diff --git a/cmd/start-concurrent-engine.go b/cmd/start-concurrent-engine.go index 7d924ba..0370599 100644 --- a/cmd/start-concurrent-engine.go +++ b/cmd/start-concurrent-engine.go @@ -4,31 +4,37 @@ import ( "fmt" "log" "os" + "sync" + "simple-golang-crawler/engine" "simple-golang-crawler/parser" "simple-golang-crawler/persist" "simple-golang-crawler/scheduler" - "sync" + + "github.com/alexflint/go-arg" ) +var cmdArgs struct { + IdType string `arg:"-t,--type" help:"id type,support:\n\taid:\t\t下载单个视频\n\tupid\t\t下载指定up主的视频"` + Id int64 `arg:"-i,--id" help:"视频或up主的id"` +} + func main() { - itemProcessFun := persist.GetItemProcessFun() var err error - var wg sync.WaitGroup - wg.Add(1) - itemChan, err := itemProcessFun(&wg) - if err != nil { - panic(err) - } - var idType string var id int64 - var req *engine.Request - fmt.Println("Please enter your id type(`aid` or `upid`)") - fmt.Scan(&idType) - fmt.Println("Please enter your id") - fmt.Scan(&id) + arg.MustParse(&cmdArgs) + if cmdArgs.IdType == "" { + fmt.Println("Please enter your id type(`aid` or `upid`)") + fmt.Scan(&idType) + fmt.Println("Please enter your id") + fmt.Scan(&id) + } else { + idType = cmdArgs.IdType + id = cmdArgs.Id + } + var req *engine.Request if idType == "aid" { req = parser.GetRequestByAid(id) } else if idType == "upid" { @@ -38,6 +44,14 @@ func main() { os.Exit(1) } + itemProcessFun := persist.GetItemProcessFun() + var wg sync.WaitGroup + wg.Add(1) + itemChan, err := itemProcessFun(&wg) + if err != nil { + panic(err) + } + queueScheduler := scheduler.NewConcurrentScheduler() conEngine := engine.NewConcurrentEngine(30, queueScheduler, itemChan) log.Println("Start working.") diff --git a/go.mod b/go.mod index 25882a3..d78465b 100644 --- a/go.mod +++ b/go.mod @@ -3,15 +3,11 @@ module simple-golang-crawler go 1.12 require ( + github.com/alexflint/go-arg v1.4.2 github.com/go-cmd/cmd v1.2.0 + github.com/stretchr/testify v1.7.0 // indirect github.com/tidwall/gjson v1.5.0 github.com/tidwall/pretty v1.0.1 // indirect - golang.org/x/crypto v0.0.0-20200210222208-86ce3cb69678 // indirect - golang.org/x/mod v0.2.0 // indirect golang.org/x/net v0.0.0-20200202094626-16171245cfb2 - golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e // indirect - golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5 // indirect golang.org/x/text v0.3.2 - golang.org/x/tools v0.0.0-20200211045251-2de505fc5306 // indirect - golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 // indirect ) diff --git a/go.sum b/go.sum index 0b64f00..dd27f47 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,20 @@ +github.com/alexflint/go-arg v1.4.2 h1:lDWZAXxpAnZUq4qwb86p/3rIJJ2Li81EoMbTMujhVa0= +github.com/alexflint/go-arg v1.4.2/go.mod h1:9iRbDxne7LcR/GSvEr7ma++GLpdIU1zrghf2y2768kM= +github.com/alexflint/go-scalar v1.0.0 h1:NGupf1XV/Xb04wXskDFzS0KWOLH632W/EO4fAFi+A70= +github.com/alexflint/go-scalar v1.0.0/go.mod h1:GpHzbCOZXEKMEcygYQ5n/aa4Aq84zbxjy3MxYW0gjYw= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/go-cmd/cmd v1.2.0 h1:Aohz0ZG0nQbvT4z55Mh+fdegX48GSAXL3cSsbYxRfvI= github.com/go-cmd/cmd v1.2.0/go.mod h1:XgKkd0L6sv9WcYV0FS8RfG1RJCSTVHTsLeAD2pTgHt0= +github.com/go-test/deep v1.0.5 h1:AKODKU3pDH1RzZzm6YZu77YWtEAq6uh1rLIAQlay2qc= github.com/go-test/deep v1.0.5/go.mod h1:QV8Hv/iy04NyLBxAdO9njL0iVPN1S4d/A3NVv1V36o8= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/tidwall/gjson v1.5.0 h1:QCssIUI7J0RStkzIcI4A7O6P8rDA5wi5IPf70uqKSxg= github.com/tidwall/gjson v1.5.0/go.mod h1:P256ACg0Mn+j1RXIDXoss50DeIABTYK1PULOJHhxOls= github.com/tidwall/match v1.0.1 h1:PnKP62LPNxHKTwvHHZZzdOAOCtsJTjo6dZLCwpKm5xc= @@ -9,25 +23,14 @@ github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhV github.com/tidwall/pretty v1.0.1 h1:WE4RBSZ1x6McVVC8S/Md+Qse8YUv6HRObAx6ke00NY8= github.com/tidwall/pretty v1.0.1/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200210222208-86ce3cb69678/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= -golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI= golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200211045251-2de505fc5306/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/parser/aid.go b/parser/aid.go index cfa1425..cbff993 100644 --- a/parser/aid.go +++ b/parser/aid.go @@ -2,6 +2,7 @@ package parser import ( "fmt" + "simple-golang-crawler/engine" "simple-golang-crawler/fetcher" "simple-golang-crawler/model" @@ -46,6 +47,9 @@ func getNewBilibiliUpSpaceReqList(pageInfo gjson.Result, upid int64) []*engine.R count := pageInfo.Get("count").Int() pn := pageInfo.Get("pn").Int() ps := pageInfo.Get("ps").Int() + if ps == 0 { + ps = 1 + } var extraPage int64 if count%ps > 0 { extraPage = 1