diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 000000000..856e932cb --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,24 @@ +run: + concurrency: 2 + timeout: 5m + +linter-settings: + goconst: + min-len: 2 + min-occurrences: 2 + +linters: + enable: + - golint + - goconst + - gofmt + - goimports + - misspell + - unparam + +issues: + exclude-use-default: false + exclude-rules: + - path: _test.go + linters: + - errcheck diff --git a/.travis.yml b/.travis.yml index 11aa9f3d4..24d13c182 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,11 @@ language: go go: - "1.13.x" +before_install: + - curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b $(go env GOPATH)/bin v1.24.0 + script: + - golangci-lint run - ./go.test.sh after_success: diff --git a/config/config.go b/config/config.go index 9522db9c2..61e3d82b0 100644 --- a/config/config.go +++ b/config/config.go @@ -1,62 +1,5 @@ package config -var ( - // Debug debug mode - Debug bool - // Version show version - Version bool - // InfoOnly Information only mode - InfoOnly bool - // Cookie http cookies - Cookie string - // Playlist download playlist - Playlist bool - // Refer use specified Referrer - Refer string - // Stream select specified stream to download - Stream string - // OutputPath output file path - OutputPath string - // OutputName output file name - OutputName string - // ExtractedData print extracted data - ExtractedData bool - // ChunkSizeMB HTTP chunk size for downloading (in MB) - ChunkSizeMB int - // UseAria2RPC Use Aria2 RPC to download - UseAria2RPC bool - // Aria2Token Aria2 RPC Token - Aria2Token string - // Aria2Addr Aria2 Address (default "localhost:6800") - Aria2Addr string - // Aria2Method Aria2 Method (default "http") - Aria2Method string - // ThreadNumber The number of download thread (only works for multiple-parts video) - ThreadNumber int - // File URLs file path - File string - // ItemStart Define the starting item of a playlist or a file input - ItemStart int - // ItemEnd Define the ending item of a playlist or a file input - ItemEnd int - // Items Define wanted items from a file or playlist. Separated by commas like: 1,5,6,8-10 - Items string - // File name of each bilibili episode doesn't include the playlist title - EpisodeTitleOnly bool - // Caption download captions - Caption bool - // YoukuCcode youku ccode - YoukuCcode string - // YoukuCkey youku ckey - YoukuCkey string - // YoukuPassword youku password - YoukuPassword string - // RetryTimes how many times to retry when the download failed - RetryTimes int - - MultiThread bool -) - // FakeHeaders fake http headers var FakeHeaders = map[string]string{ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", diff --git a/downloader/downloader.go b/downloader/downloader.go index 383274fb6..6bc2a49f0 100644 --- a/downloader/downloader.go +++ b/downloader/downloader.go @@ -17,11 +17,38 @@ import ( "github.com/cheggaaa/pb" - "github.com/iawia002/annie/config" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/request" "github.com/iawia002/annie/utils" ) +// Options defines options used in downloading. +type Options struct { + InfoOnly bool + Stream string + Refer string + OutputPath string + OutputName string + FileNameLength int + Caption bool + + MultiThread bool + ThreadNumber int + RetryTimes int + ChunkSizeMB int + // Aria2 + UseAria2RPC bool + Aria2Token string + Aria2Method string + Aria2Addr string +} + +// Downloader is the default downloader. +type Downloader struct { + bar *pb.ProgressBar + option Options +} + func progressBar(size int64) *pb.ProgressBar { bar := pb.New64(size).SetUnits(pb.U_BYTES).SetRefreshRate(time.Millisecond * 10) bar.ShowSpeed = true @@ -30,17 +57,27 @@ func progressBar(size int64) *pb.ProgressBar { return bar } -// Caption download danmaku, subtitles, etc -func Caption(url, refer, fileName, ext string) error { - if !config.Caption || config.InfoOnly { - return nil +// New returns a new Downloader implementation. +func New(option Options) *Downloader { + downloader := &Downloader{ + option: option, } + return downloader +} + +// caption downloads danmaku, subtitles, etc +func (downloader *Downloader) caption(url, fileName, ext string) error { fmt.Println("\nDownloading captions...") + + refer := downloader.option.Refer + if refer == "" { + refer = url + } body, err := request.GetByte(url, refer, nil) if err != nil { return err } - filePath, err := utils.FilePath(fileName, ext, true) + filePath, err := utils.FilePath(fileName, ext, downloader.option.FileNameLength, downloader.option.OutputPath, true) if err != nil { return err } @@ -48,7 +85,7 @@ func Caption(url, refer, fileName, ext string) error { if fileError != nil { return fileError } - defer file.Close() + defer file.Close() // nolint if _, err = file.Write(body); err != nil { return err @@ -56,16 +93,14 @@ func Caption(url, refer, fileName, ext string) error { return nil } -func writeFile( - url string, file *os.File, headers map[string]string, bar *pb.ProgressBar, -) (int64, error) { +func (downloader *Downloader) writeFile(url string, file *os.File, headers map[string]string) (int64, error) { res, err := request.Request(http.MethodGet, url, nil, headers) if err != nil { return 0, err } - defer res.Body.Close() + defer res.Body.Close() // nolint - writer := io.MultiWriter(file, bar) + writer := io.MultiWriter(file, downloader.bar) // Note that io.Copy reads 32kb(maximum) from input and writes them to output, then repeats. // So don't worry about memory. written, copyErr := io.Copy(writer, res.Body) @@ -75,12 +110,8 @@ func writeFile( return written, nil } -// Save save url file -func Save( - urlData URL, refer, fileName string, bar *pb.ProgressBar, chunkSizeMB int, -) error { - var err error - filePath, err := utils.FilePath(fileName, urlData.Ext, false) +func (downloader *Downloader) save(part *types.Part, fileName string) error { + filePath, err := utils.FilePath(fileName, part.Ext, downloader.option.FileNameLength, downloader.option.OutputPath, false) if err != nil { return err } @@ -88,24 +119,19 @@ func Save( if err != nil { return err } - if bar == nil { - bar = progressBar(urlData.Size) - bar.Start() - } // Skip segment file // TODO: Live video URLs will not return the size - if exists && fileSize == urlData.Size { - bar.Add64(fileSize) + if exists && fileSize == part.Size { + downloader.bar.Add64(fileSize) return nil } + tempFilePath := filePath + ".download" tempFileSize, _, err := utils.FileSize(tempFilePath) if err != nil { return err } - headers := map[string]string{ - "Referer": refer, - } + headers := make(map[string]string, 1) var ( file *os.File fileError error @@ -114,7 +140,7 @@ func Save( // range start from 0, 0-1023 means the first 1024 bytes of the file headers["Range"] = fmt.Sprintf("bytes=%d-", tempFileSize) file, fileError = os.OpenFile(tempFilePath, os.O_APPEND|os.O_WRONLY, 0644) - bar.Add64(tempFileSize) + downloader.bar.Add64(tempFileSize) } else { file, fileError = os.Create(tempFilePath) } @@ -126,16 +152,16 @@ func Save( defer func() { // must close the file before rename or it will cause // `The process cannot access the file because it is being used by another process.` error. - file.Close() + file.Close() // nolint if err == nil { - os.Rename(tempFilePath, filePath) + os.Rename(tempFilePath, filePath) // nolint } }() - if chunkSizeMB > 0 { + if downloader.option.ChunkSizeMB > 0 { var start, end, chunkSize int64 - chunkSize = int64(chunkSizeMB) * 1024 * 1024 - remainingSize := urlData.Size + chunkSize = int64(downloader.option.ChunkSizeMB) * 1024 * 1024 + remainingSize := part.Size if tempFileSize > 0 { start = tempFileSize remainingSize -= tempFileSize @@ -150,10 +176,10 @@ func Save( headers["Range"] = fmt.Sprintf("bytes=%d-%d", start, end) temp := start for i := 0; ; i++ { - written, err := writeFile(urlData.URL, file, headers, bar) + written, err := downloader.writeFile(part.URL, file, headers) if err == nil { break - } else if i+1 >= config.RetryTimes { + } else if i+1 >= downloader.option.RetryTimes { return err } temp += written @@ -165,10 +191,10 @@ func Save( } else { temp := tempFileSize for i := 0; ; i++ { - written, err := writeFile(urlData.URL, file, headers, bar) + written, err := downloader.writeFile(part.URL, file, headers) if err == nil { break - } else if i+1 >= config.RetryTimes { + } else if i+1 >= downloader.option.RetryTimes { return err } temp += written @@ -180,10 +206,8 @@ func Save( return nil } -func MultiThreadSave( - urlData URL, refer, fileName string, bar *pb.ProgressBar, chunkSizeMB, threadNum int, -) error { - filePath, err := utils.FilePath(fileName, urlData.Ext, false) +func (downloader *Downloader) multiThreadSave(dataPart *types.Part, fileName string) error { + filePath, err := utils.FilePath(fileName, dataPart.Ext, downloader.option.FileNameLength, downloader.option.OutputPath, false) if err != nil { return err } @@ -191,14 +215,11 @@ func MultiThreadSave( if err != nil { return err } - if bar == nil { - bar = progressBar(urlData.Size) - bar.Start() - } + // Skip segment file // TODO: Live video URLs will not return the size - if exists && fileSize == urlData.Size { - bar.Add64(fileSize) + if exists && fileSize == dataPart.Size { + downloader.bar.Add64(fileSize) return nil } tmpFilePath := filePath + ".download" @@ -207,19 +228,18 @@ func MultiThreadSave( return err } if tmpExists { - if tmpFileSize == urlData.Size { - bar.Add64(urlData.Size) + if tmpFileSize == dataPart.Size { + downloader.bar.Add64(dataPart.Size) return os.Rename(tmpFilePath, filePath) - } else { - err = os.Remove(tmpFilePath) - if err != nil { - return err - } + } + + if err = os.Remove(tmpFilePath); err != nil { + return err } } // Scan all parts - parts, err := readDirAllFilePart(filePath, fileName, urlData.Ext) + parts, err := readDirAllFilePart(filePath, fileName, dataPart.Ext) if err != nil { return err } @@ -259,11 +279,11 @@ func MultiThreadSave( } lastEnd = part.End } - if lastEnd != urlData.Size-1 { + if lastEnd != dataPart.Size-1 { newPart := &FilePartMeta{ Index: parts[len(parts)-1].Index + 1, Start: lastEnd + 1, - End: urlData.Size - 1, + End: dataPart.Size - 1, Cur: lastEnd + 1, } parts = append(parts, newPart) @@ -272,14 +292,14 @@ func MultiThreadSave( } else { var start, end, partSize int64 var i float32 - partSize = urlData.Size / int64(threadNum) + partSize = dataPart.Size / int64(downloader.option.ThreadNumber) i = 0 - for start < urlData.Size { + for start < dataPart.Size { end = start + partSize - 1 - if end > urlData.Size { - end = urlData.Size - 1 - } else if int(i+1) == threadNum && end < urlData.Size { - end = urlData.Size - 1 + if end > dataPart.Size { + end = dataPart.Size - 1 + } else if int(i+1) == downloader.option.ThreadNumber && end < dataPart.Size { + end = dataPart.Size - 1 } part := &FilePartMeta{ Index: i, @@ -294,13 +314,13 @@ func MultiThreadSave( } } if savedSize > 0 { - bar.Add64(savedSize) - if savedSize == urlData.Size { + downloader.bar.Add64(savedSize) + if savedSize == dataPart.Size { return mergeMultiPart(filePath, parts) } } - wgp := utils.NewWaitGroupPool(threadNum) + wgp := utils.NewWaitGroupPool(downloader.option.ThreadNumber) var errs []error for _, part := range unfinishedPart { wgp.Add() @@ -311,17 +331,16 @@ func MultiThreadSave( return } defer func() { - file.Close() + file.Close() // nolint wgp.Done() }() + var end, chunkSize int64 - headers := map[string]string{ - "Referer": refer, - } - if chunkSizeMB <= 0 { + headers := make(map[string]string, 1) + if downloader.option.ChunkSizeMB <= 0 { chunkSize = part.End - part.Start + 1 } else { - chunkSize = int64(chunkSizeMB) * 1024 * 1024 + chunkSize = int64(downloader.option.ChunkSizeMB) * 1024 * 1024 } end = computeEnd(part.Cur, chunkSize, part.End) remainingSize := part.End - part.Cur + 1 @@ -338,11 +357,11 @@ func MultiThreadSave( headers["Range"] = fmt.Sprintf("bytes=%d-%d", part.Cur, end) temp := part.Cur for i := 0; ; i++ { - written, err := writeFile(urlData.URL, file, headers, bar) + written, err := downloader.writeFile(dataPart.URL, file, headers) if err == nil { remainingSize -= chunkSize break - } else if i+1 >= config.RetryTimes { + } else if i+1 >= downloader.option.RetryTimes { errs = append(errs, err) return } @@ -379,7 +398,7 @@ func readDirAllFilePart(filePath, filename, extname string) ([]*FilePartMeta, er if err != nil { return nil, err } - defer dir.Close() + defer dir.Close() // nolint fns, err := dir.Readdir(0) if err != nil { return nil, err @@ -408,14 +427,14 @@ func parseFilePartMeta(filepath string, fileSize int64) (*FilePartMeta, error) { if err != nil { return nil, err } - defer file.Close() + defer file.Close() // nolint var buf [512]byte readSize, err := file.ReadAt(buf[0:size], 0) if err != nil && err != io.EOF { return nil, err } if readSize < size { - return nil, fmt.Errorf("The file has been broked, please delete all part files and re-download.\n") + return nil, fmt.Errorf("the file has been broked, please delete all part files and re-download") } err = binary.Read(bytes.NewBuffer(buf[:size]), binary.LittleEndian, meta) if err != nil { @@ -439,8 +458,8 @@ func mergeMultiPart(filepath string, parts []*FilePartMeta) error { var partFiles []*os.File defer func() { for _, f := range partFiles { - f.Close() - os.Remove(f.Name()) + f.Close() // nolint + os.Remove(f.Name()) // nolint } }() for _, part := range parts { @@ -458,88 +477,89 @@ func mergeMultiPart(filepath string, parts []*FilePartMeta) error { return err } } - tempFile.Close() + tempFile.Close() // nolint err = os.Rename(tempFilePath, filepath) return err } +func (downloader *Downloader) aria2(title string, stream *types.Stream) error { + rpcData := Aria2RPCData{ + JSONRPC: "2.0", + ID: "annie", // can be modified + Method: "aria2.addUri", + } + rpcData.Params[0] = "token:" + downloader.option.Aria2Token + var urls []string + for _, p := range stream.Parts { + urls = append(urls, p.URL) + } + var inputs Aria2Input + inputs.Header = append(inputs.Header, "Referer: "+downloader.option.Refer) + for i := range urls { + rpcData.Params[1] = urls[i : i+1] + inputs.Out = fmt.Sprintf("%s[%d].%s", title, i, stream.Parts[0].Ext) + rpcData.Params[2] = &inputs + jsonData, err := json.Marshal(rpcData) + if err != nil { + return err + } + reqURL := fmt.Sprintf("%s://%s/jsonrpc", downloader.option.Aria2Method, downloader.option.Aria2Addr) + req, err := http.NewRequest(http.MethodPost, reqURL, bytes.NewBuffer(jsonData)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + + var client = http.Client{Timeout: 30 * time.Second} + res, err := client.Do(req) + if err != nil { + return err + } + // The http Client and Transport guarantee that Body is always + // non-nil, even on responses without a body or responses with + // a zero-length body. + res.Body.Close() // nolint + } + return nil +} + // Download download urls -func Download(v Data, refer string, chunkSizeMB int) error { - v.genSortedStreams() - var ( - title string - stream string - ) - if config.OutputName == "" { - title = utils.FileName(v.Title, "") - } else { - title = utils.FileName(config.OutputName, "") +func (downloader *Downloader) Download(data *types.Data) error { + sortedStreams := genSortedStreams(data.Streams) + if downloader.option.InfoOnly { + printInfo(data, sortedStreams) + return nil } - if config.Stream == "" { - stream = v.sortedStreams[0].name - } else { - stream = config.Stream + + title := downloader.option.OutputName + if title == "" { + title = data.Title } - data, ok := v.Streams[stream] + title = utils.FileName(title, "", downloader.option.FileNameLength) + + streamName := downloader.option.Stream + if streamName == "" { + streamName = sortedStreams[0].ID + } + stream, ok := data.Streams[streamName] if !ok { - return fmt.Errorf("no stream named %s", stream) + return fmt.Errorf("no stream named %s", streamName) } - v.printInfo(stream) // if InfoOnly, this func will print all streams info - if config.InfoOnly { - return nil + + printStreamInfo(data, stream) + + // download caption + if downloader.option.Caption && data.Caption != nil { + downloader.caption(data.Caption.URL, title, data.Caption.Ext) // nolint } - // Use aria2 rpc to download - if config.UseAria2RPC { - rpcData := Aria2RPCData{ - JSONRPC: "2.0", - ID: "annie", // can be modified - Method: "aria2.addUri", - } - rpcData.Params[0] = "token:" + config.Aria2Token - var urls []string - for _, p := range data.URLs { - urls = append(urls, p.URL) - } - var inputs Aria2Input - inputs.Header = append(inputs.Header, "Referer: "+refer) - for i := range urls { - rpcData.Params[1] = urls[i : i+1] - inputs.Out = fmt.Sprintf("%s[%d].%s", title, i, data.URLs[0].Ext) - rpcData.Params[2] = &inputs - jsonData, err := json.Marshal(rpcData) - if err != nil { - return err - } - reqURL := fmt.Sprintf("%s://%s/jsonrpc", config.Aria2Method, config.Aria2Addr) - req, err := http.NewRequest(http.MethodPost, reqURL, bytes.NewBuffer(jsonData)) - if err != nil { - return err - } - req.Header.Set("Content-Type", "application/json") - var client = http.Client{Timeout: 30 * time.Second} - res, err := client.Do(req) - if err != nil { - return err - } - // The http Client and Transport guarantee that Body is always - // non-nil, even on responses without a body or responses with - // a zero-length body. - res.Body.Close() - } - return nil + // Use aria2 rpc to download + if downloader.option.UseAria2RPC { + return downloader.aria2(title, stream) } // Skip the complete file that has been merged - var ( - mergedFilePath string - err error - ) - if v.Site == "YouTube youtube.com" { - mergedFilePath, err = utils.FilePath(title, data.URLs[0].Ext, false) - } else { - mergedFilePath, err = utils.FilePath(title, "mp4", false) - } + mergedFilePath, err := utils.FilePath(title, stream.Ext, downloader.option.FileNameLength, downloader.option.OutputPath, false) if err != nil { return err } @@ -552,67 +572,66 @@ func Download(v Data, refer string, chunkSizeMB int) error { fmt.Printf("%s: file already exists, skipping\n", mergedFilePath) return nil } - bar := progressBar(data.Size) - bar.Start() - if len(data.URLs) == 1 { + + downloader.bar = progressBar(stream.Size) + downloader.bar.Start() + if len(stream.Parts) == 1 { // only one fragment var err error - if config.MultiThread { - err = MultiThreadSave(data.URLs[0], refer, title, bar, chunkSizeMB, config.ThreadNumber) + if downloader.option.MultiThread { + err = downloader.multiThreadSave(stream.Parts[0], title) } else { - err = Save(data.URLs[0], refer, title, bar, chunkSizeMB) + err = downloader.save(stream.Parts[0], title) } if err != nil { return err } - bar.Finish() + downloader.bar.Finish() return nil } - wgp := utils.NewWaitGroupPool(config.ThreadNumber) + + wgp := utils.NewWaitGroupPool(downloader.option.ThreadNumber) // multiple fragments errs := make([]error, 0) lock := sync.Mutex{} - parts := make([]string, len(data.URLs)) - for index, url := range data.URLs { + parts := make([]string, len(stream.Parts)) + for index, part := range stream.Parts { if len(errs) > 0 { break } partFileName := fmt.Sprintf("%s[%d]", title, index) - partFilePath, err := utils.FilePath(partFileName, url.Ext, false) + partFilePath, err := utils.FilePath(partFileName, part.Ext, downloader.option.FileNameLength, downloader.option.OutputPath, false) if err != nil { return err } parts[index] = partFilePath wgp.Add() - go func(url URL, refer, fileName string, bar *pb.ProgressBar) { + go func(part *types.Part, fileName string) { defer wgp.Done() - err := Save(url, refer, fileName, bar, chunkSizeMB) + err := downloader.save(part, fileName) if err != nil { lock.Lock() errs = append(errs, err) lock.Unlock() } - }(url, refer, partFileName, bar) + }(part, partFileName) } wgp.Wait() if len(errs) > 0 { return errs[0] } - bar.Finish() + downloader.bar.Finish() - if v.Type != "video" { + if data.Type != types.DataTypeVideo { return nil } - // merge + fmt.Printf("Merging video parts into %s\n", mergedFilePath) - if v.Site == "YouTube youtube.com" { - err = utils.MergeAudioAndVideo(parts, mergedFilePath) - } else { - err = utils.MergeToMP4(parts, mergedFilePath, title) + if stream.Ext == "mp4" { + return utils.MergeToMP4(parts, mergedFilePath, title) } - - return err + return utils.MergeFilesWithSameExtension(parts, mergedFilePath) } diff --git a/downloader/downloader_test.go b/downloader/downloader_test.go index 6a9a32260..c13f89e7f 100644 --- a/downloader/downloader_test.go +++ b/downloader/downloader_test.go @@ -1,31 +1,26 @@ package downloader import ( - // "os" "testing" - "github.com/iawia002/annie/config" + "github.com/iawia002/annie/extractors/types" ) -func init() { - config.RetryTimes = 100 - config.ThreadNumber = 1 -} - func TestDownload(t *testing.T) { testCases := []struct { name string - data Data + data *types.Data }{ { name: "normal test", - data: Data{ + data: &types.Data{ Site: "douyin", Title: "test", - Type: "video", - Streams: map[string]Stream{ + Type: types.DataTypeVideo, + Streams: map[string]*types.Stream{ "default": { - URLs: []URL{ + ID: "default", + Parts: []*types.Part{ { URL: "https://aweme.snssdk.com/aweme/v1/playwm/?video_id=v0200f9a0000bc117isuatl67cees890&line=0", Size: 4927877, @@ -38,13 +33,14 @@ func TestDownload(t *testing.T) { }, { name: "multi-stream test", - data: Data{ + data: &types.Data{ Site: "douyin", Title: "test2", - Type: "video", - Streams: map[string]Stream{ + Type: types.DataTypeVideo, + Streams: map[string]*types.Stream{ "miaopai": { - URLs: []URL{ + ID: "miaopai", + Parts: []*types.Part{ { URL: "https://txycdn.miaopai.com/stream/KwR26jUGh2ySnVjYbQiFmomNjP14LtMU3vi6sQ__.mp4?ssig=6594aa01a78e78f50c65c164d186ba9e&time_stamp=1537070910786", Size: 4011590, @@ -54,7 +50,8 @@ func TestDownload(t *testing.T) { Size: 4011590, }, "douyin": { - URLs: []URL{ + ID: "douyin", + Parts: []*types.Part{ { URL: "https://aweme.snssdk.com/aweme/v1/playwm/?video_id=v0200f9a0000bc117isuatl67cees890&line=0", Size: 4927877, @@ -68,13 +65,14 @@ func TestDownload(t *testing.T) { }, { name: "image test", - data: Data{ + data: &types.Data{ Site: "bcy", Title: "bcy image test", - Type: "image", - Streams: map[string]Stream{ + Type: types.DataTypeImage, + Streams: map[string]*types.Stream{ "default": { - URLs: []URL{ + ID: "default", + Parts: []*types.Part{ { URL: "http://img5.bcyimg.com/coser/143767/post/c0j7x/0d713eb41a614053ac6a3b146914f6bc.jpg/w650", Size: 56107, @@ -92,7 +90,7 @@ func TestDownload(t *testing.T) { }, } for _, testCase := range testCases { - err := Download(testCase.data, "", 10) + err := New(Options{}).Download(testCase.data) if err != nil { t.Error(err) } diff --git a/downloader/types.go b/downloader/types.go index 7ba7c880c..b0f0c9f18 100644 --- a/downloader/types.go +++ b/downloader/types.go @@ -1,124 +1,6 @@ package downloader -import ( - "fmt" - "sort" - - "github.com/fatih/color" - - "github.com/iawia002/annie/config" -) - -// URL data struct for single URL information -type URL struct { - URL string `json:"url"` - Size int64 `json:"size"` - Ext string `json:"ext"` -} - -// Stream data struct for each stream -type Stream struct { - // [URL: {URL, Size, Ext}, ...] - // Some video files have multiple fragments - // and support for downloading multiple image files at once - URLs []URL `json:"urls"` - Quality string `json:"quality"` - // total size of all urls - Size int64 `json:"size"` - - // name used in sortedStreams - name string -} - -// Data data struct for video information -type Data struct { - Site string `json:"site"` - Title string `json:"title"` - Type string `json:"type"` - // each stream has it's own URLs and Quality - Streams map[string]Stream `json:"streams"` - sortedStreams []Stream - - // Err is used to record whether an error occurred when extracting data. - // It is used to record the error information corresponding to each url when extracting the list data. - // NOTE(iawia002): err is only used in Data list - Err error `json:"-"` - // URL is used to record the address of this download - URL string `json:"url"` -} - -// EmptyData returns an "empty" Data object with the given URL and error -func EmptyData(url string, err error) Data { - return Data{ - URL: url, - Err: err, - } -} - -func (data *Stream) calculateTotalSize() { - var size int64 - for _, urlData := range data.URLs { - size += urlData.Size - } - data.Size = size -} - -func (data Stream) printStream() { - blue := color.New(color.FgBlue) - cyan := color.New(color.FgCyan) - blue.Println(fmt.Sprintf(" [%s] -------------------", data.name)) - if data.Quality != "" { - cyan.Printf(" Quality: ") - fmt.Println(data.Quality) - } - cyan.Printf(" Size: ") - if data.Size == 0 { - data.calculateTotalSize() - } - fmt.Printf("%.2f MiB (%d Bytes)\n", float64(data.Size)/(1024*1024), data.Size) - cyan.Printf(" # download with: ") - fmt.Printf("annie -f %s ...\n\n", data.name) -} - -func (v *Data) genSortedStreams() { - for k, data := range v.Streams { - if data.Size == 0 { - data.calculateTotalSize() - } - data.name = k - v.Streams[k] = data - v.sortedStreams = append(v.sortedStreams, data) - } - if len(v.Streams) > 1 { - sort.Slice( - v.sortedStreams, func(i, j int) bool { return v.sortedStreams[i].Size > v.sortedStreams[j].Size }, - ) - } -} - -func (v *Data) printInfo(stream string) { - cyan := color.New(color.FgCyan) - fmt.Println() - cyan.Printf(" Site: ") - fmt.Println(v.Site) - cyan.Printf(" Title: ") - fmt.Println(v.Title) - cyan.Printf(" Type: ") - fmt.Println(v.Type) - if config.InfoOnly { - cyan.Printf(" Streams: ") - fmt.Println("# All available quality") - for _, data := range v.sortedStreams { - data.printStream() - } - } else { - cyan.Printf(" Stream: ") - fmt.Println() - v.Streams[stream].printStream() - } -} - -// Aria2RPCData json RPC 2.0 for Aria2 +// Aria2RPCData defines the data structure of json RPC 2.0 info for Aria2 type Aria2RPCData struct { // More info about RPC interface please refer to // https://aria2.github.io/manual/en/html/aria2c.html#rpc-interface @@ -130,7 +12,7 @@ type Aria2RPCData struct { Params [3]interface{} `json:"params"` } -// Aria2Input options for `aria2.addUri` +// Aria2Input is options for `aria2.addUri` // https://aria2.github.io/manual/en/html/aria2c.html#id3 type Aria2Input struct { // The file name of the downloaded file @@ -139,6 +21,7 @@ type Aria2Input struct { Header []string `json:"header"` } +// FilePartMeta defines the data structure of file meta info. type FilePartMeta struct { Index float32 Start int64 diff --git a/downloader/utils.go b/downloader/utils.go new file mode 100644 index 000000000..fba9ea58c --- /dev/null +++ b/downloader/utils.go @@ -0,0 +1,68 @@ +package downloader + +import ( + "fmt" + "sort" + + "github.com/fatih/color" + + "github.com/iawia002/annie/extractors/types" +) + +var ( + blue = color.New(color.FgBlue) + cyan = color.New(color.FgCyan) +) + +func genSortedStreams(streams map[string]*types.Stream) []*types.Stream { + sortedStreams := make([]*types.Stream, 0, len(streams)) + for _, data := range streams { + sortedStreams = append(sortedStreams, data) + } + if len(sortedStreams) > 1 { + sort.Slice( + sortedStreams, func(i, j int) bool { return sortedStreams[i].Size > sortedStreams[j].Size }, + ) + } + return sortedStreams +} + +func printHeader(data *types.Data) { + fmt.Println() + cyan.Printf(" Site: ") // nolint + fmt.Println(data.Site) + cyan.Printf(" Title: ") // nolint + fmt.Println(data.Title) + cyan.Printf(" Type: ") // nolint + fmt.Println(data.Type) +} + +func printStream(stream *types.Stream) { + blue.Println(fmt.Sprintf(" [%s] -------------------", stream.ID)) // nolint + if stream.Quality != "" { + cyan.Printf(" Quality: ") // nolint + fmt.Println(stream.Quality) + } + cyan.Printf(" Size: ") // nolint + fmt.Printf("%.2f MiB (%d Bytes)\n", float64(stream.Size)/(1024*1024), stream.Size) + cyan.Printf(" # download with: ") // nolint + fmt.Printf("annie -f %s ...\n\n", stream.ID) +} + +func printInfo(data *types.Data, sortedStreams []*types.Stream) { + printHeader(data) + + cyan.Printf(" Streams: ") // nolint + fmt.Println("# All available quality") + for _, stream := range sortedStreams { + printStream(stream) + } +} + +func printStreamInfo(data *types.Data, stream *types.Stream) { + printHeader(data) + + cyan.Printf(" Stream: ") // nolint + fmt.Println() + printStream(stream) +} diff --git a/extractors/bcy/bcy.go b/extractors/bcy/bcy.go index 8617998ce..a4958915c 100644 --- a/extractors/bcy/bcy.go +++ b/extractors/bcy/bcy.go @@ -5,8 +5,7 @@ import ( "fmt" "strings" - "github.com/iawia002/annie/downloader" - "github.com/iawia002/annie/extractors" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/parser" "github.com/iawia002/annie/request" "github.com/iawia002/annie/utils" @@ -22,8 +21,15 @@ type bcyData struct { } `json:"detail"` } -// Extract is the main function for extracting data -func Extract(url string) ([]downloader.Data, error) { +type extractor struct{} + +// New returns a youtube extractor. +func New() types.Extractor { + return &extractor{} +} + +// Extract is the main function to extract the data. +func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) { html, err := request.Get(url, url, nil) if err != nil { return nil, err @@ -33,7 +39,7 @@ func Extract(url string) ([]downloader.Data, error) { rep := strings.NewReplacer(`\"`, `"`, `\\`, `\`) realURLs := utils.MatchOneOf(html, `JSON.parse\("(.+?)"\);`) if realURLs == nil || len(realURLs) < 2 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } jsonString := rep.Replace(realURLs[1]) @@ -48,7 +54,7 @@ func Extract(url string) ([]downloader.Data, error) { } title := strings.Replace(parser.Title(doc), " - 半次元 banciyuan - ACG爱好者社区", "", -1) - urls := make([]downloader.URL, 0, len(data.Detail.PostData.Multi)) + parts := make([]*types.Part, 0, len(data.Detail.PostData.Multi)) var totalSize int64 for _, img := range data.Detail.PostData.Multi { size, err := request.Size(img.OriginalPath, url) @@ -60,23 +66,23 @@ func Extract(url string) ([]downloader.Data, error) { if err != nil { return nil, err } - urls = append(urls, downloader.URL{ + parts = append(parts, &types.Part{ URL: img.OriginalPath, Size: size, Ext: ext, }) } - streams := map[string]downloader.Stream{ + streams := map[string]*types.Stream{ "default": { - URLs: urls, - Size: totalSize, + Parts: parts, + Size: totalSize, }, } - return []downloader.Data{ + return []*types.Data{ { Site: "半次元 bcy.net", Title: title, - Type: "image", + Type: types.DataTypeImage, Streams: streams, URL: url, }, diff --git a/extractors/bcy/bcy_test.go b/extractors/bcy/bcy_test.go index b785da7d1..fd021d58c 100644 --- a/extractors/bcy/bcy_test.go +++ b/extractors/bcy/bcy_test.go @@ -3,13 +3,11 @@ package bcy import ( "testing" - "github.com/iawia002/annie/config" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/test" ) func TestDownload(t *testing.T) { - config.InfoOnly = true - config.RetryTimes = 100 tests := []struct { name string args test.Args @@ -25,7 +23,7 @@ func TestDownload(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - data, err := Extract(tt.args.URL) + data, err := New().Extract(tt.args.URL, types.Options{}) test.CheckError(t, err) test.Check(t, tt.args, data[0]) }) diff --git a/extractors/bilibili/bilibili.go b/extractors/bilibili/bilibili.go index 05627bb33..231b3978a 100644 --- a/extractors/bilibili/bilibili.go +++ b/extractors/bilibili/bilibili.go @@ -7,9 +7,7 @@ import ( "strconv" "strings" - "github.com/iawia002/annie/config" - "github.com/iawia002/annie/downloader" - "github.com/iawia002/annie/extractors" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/parser" "github.com/iawia002/annie/request" "github.com/iawia002/annie/utils" @@ -32,13 +30,13 @@ const referer = "https://www.bilibili.com" var utoken string -func genAPI(aid, cid int, bangumi bool, quality string, seasonType string) (string, error) { +func genAPI(aid, cid int, bangumi bool, quality, seasonType, cookie string) (string, error) { var ( err error baseAPIURL string params string ) - if config.Cookie != "" && utoken == "" { + if cookie != "" && utoken == "" { utoken, err = request.Get( fmt.Sprintf("%said=%d&cid=%d", bilibiliTokenAPI, aid, cid), referer, @@ -83,18 +81,18 @@ func genAPI(aid, cid int, bangumi bool, quality string, seasonType string) (stri return api, nil } -func genURL(durl []dURLData) ([]downloader.URL, int64) { +func genParts(durl []dURLData) ([]*types.Part, int64) { var size int64 - urls := make([]downloader.URL, len(durl)) + parts := make([]*types.Part, len(durl)) for index, data := range durl { size += data.Size - urls[index] = downloader.URL{ + parts[index] = &types.Part{ URL: data.URL, Size: data.Size, Ext: "flv", } } - return urls, size + return parts, size } type bilibiliOptions struct { @@ -105,16 +103,17 @@ type bilibiliOptions struct { cid int page int subtitle string + cookie string } -func extractBangumi(url, html string) ([]downloader.Data, error) { +func extractBangumi(url, html string, extractOption types.Options) ([]*types.Data, error) { dataString := utils.MatchOneOf(html, `window.__INITIAL_STATE__=(.+?);\(function`)[1] var data bangumiData err := json.Unmarshal([]byte(dataString), &data) if err != nil { return nil, err } - if !config.Playlist { + if !extractOption.Playlist { options := bilibiliOptions{ url: url, html: html, @@ -122,13 +121,13 @@ func extractBangumi(url, html string) ([]downloader.Data, error) { aid: data.EpInfo.Aid, cid: data.EpInfo.Cid, } - return []downloader.Data{bilibiliDownload(options)}, nil + return []*types.Data{bilibiliDownload(options, extractOption)}, nil } // handle bangumi playlist - needDownloadItems := utils.NeedDownloadList(len(data.EpList)) - extractedData := make([]downloader.Data, len(needDownloadItems)) - wgp := utils.NewWaitGroupPool(config.ThreadNumber) + needDownloadItems := utils.NeedDownloadList(extractOption.Items, extractOption.ItemStart, extractOption.ItemEnd, len(data.EpList)) + extractedData := make([]*types.Data, len(needDownloadItems)) + wgp := utils.NewWaitGroupPool(extractOption.ThreadNumber) dataIndex := 0 for index, u := range data.EpList { if !utils.ItemInSlice(index+1, needDownloadItems) { @@ -146,9 +145,9 @@ func extractBangumi(url, html string) ([]downloader.Data, error) { aid: u.Aid, cid: u.Cid, } - go func(index int, options bilibiliOptions, extractedData []downloader.Data) { + go func(index int, options bilibiliOptions, extractedData []*types.Data) { defer wgp.Done() - extractedData[index] = bilibiliDownload(options) + extractedData[index] = bilibiliDownload(options, extractOption) }(dataIndex, options, extractedData) dataIndex++ } @@ -171,12 +170,12 @@ func getMultiPageData(html string) (*multiPage, error) { return &data, nil } -func extractNormalVideo(url, html string) ([]downloader.Data, error) { +func extractNormalVideo(url, html string, extractOption types.Options) ([]*types.Data, error) { pageData, err := getMultiPageData(html) if err != nil { return nil, err } - if !config.Playlist { + if !extractOption.Playlist { // handle URL that has a playlist, mainly for unified titles //

tag does not include subtitles // bangumi doesn't need this @@ -191,7 +190,7 @@ func extractNormalVideo(url, html string) ([]downloader.Data, error) { } if len(pageData.VideoData.Pages) < p || p < 1 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } page := pageData.VideoData.Pages[p-1] @@ -208,14 +207,14 @@ func extractNormalVideo(url, html string) ([]downloader.Data, error) { } else { options.subtitle = page.Part } - return []downloader.Data{bilibiliDownload(options)}, nil + return []*types.Data{bilibiliDownload(options, extractOption)}, nil } // handle normal video playlist // https://www.bilibili.com/video/av20827366/?p=1 - needDownloadItems := utils.NeedDownloadList(len(pageData.VideoData.Pages)) - extractedData := make([]downloader.Data, len(needDownloadItems)) - wgp := utils.NewWaitGroupPool(config.ThreadNumber) + needDownloadItems := utils.NeedDownloadList(extractOption.Items, extractOption.ItemStart, extractOption.ItemEnd, len(pageData.VideoData.Pages)) + extractedData := make([]*types.Data, len(needDownloadItems)) + wgp := utils.NewWaitGroupPool(extractOption.ThreadNumber) dataIndex := 0 for index, u := range pageData.VideoData.Pages { if !utils.ItemInSlice(index+1, needDownloadItems) { @@ -230,9 +229,9 @@ func extractNormalVideo(url, html string) ([]downloader.Data, error) { subtitle: u.Part, page: u.Page, } - go func(index int, options bilibiliOptions, extractedData []downloader.Data) { + go func(index int, options bilibiliOptions, extractedData []*types.Data) { defer wgp.Done() - extractedData[index] = bilibiliDownload(options) + extractedData[index] = bilibiliDownload(options, extractOption) }(dataIndex, options, extractedData) dataIndex++ } @@ -240,8 +239,15 @@ func extractNormalVideo(url, html string) ([]downloader.Data, error) { return extractedData, nil } -// Extract is the main function for extracting data -func Extract(url string) ([]downloader.Data, error) { +type extractor struct{} + +// New returns a youtube extractor. +func New() types.Extractor { + return &extractor{} +} + +// Extract is the main function to extract the data. +func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) { var err error html, err := request.Get(url, referer, nil) if err != nil { @@ -249,14 +255,14 @@ func Extract(url string) ([]downloader.Data, error) { } if strings.Contains(url, "bangumi") { // handle bangumi - return extractBangumi(url, html) + return extractBangumi(url, html, option) } // handle normal video - return extractNormalVideo(url, html) + return extractNormalVideo(url, html, option) } // bilibiliDownload is the download function for a single URL -func bilibiliDownload(options bilibiliOptions) downloader.Data { +func bilibiliDownload(options bilibiliOptions, extractOption types.Options) *types.Data { var ( err error html string @@ -268,7 +274,7 @@ func bilibiliDownload(options bilibiliOptions) downloader.Data { } else { html, err = request.Get(options.url, referer, nil) if err != nil { - return downloader.EmptyData(options.url, err) + return types.EmptyData(options.url, err) } } if options.bangumi { @@ -278,34 +284,34 @@ func bilibiliDownload(options bilibiliOptions) downloader.Data { // Get "accept_quality" and "accept_description" // "accept_description":["高清 1080P","高清 720P","清晰 480P","流畅 360P"], // "accept_quality":[80,48,32,16], - api, err := genAPI(options.aid, options.cid, options.bangumi, "15", seasonType) + api, err := genAPI(options.aid, options.cid, options.bangumi, "15", seasonType, options.cookie) if err != nil { - return downloader.EmptyData(options.url, err) + return types.EmptyData(options.url, err) } jsonString, err := request.Get(api, referer, nil) if err != nil { - return downloader.EmptyData(options.url, err) + return types.EmptyData(options.url, err) } var quality qualityInfo err = json.Unmarshal([]byte(jsonString), &quality) if err != nil { - return downloader.EmptyData(options.url, err) + return types.EmptyData(options.url, err) } - streams := make(map[string]downloader.Stream, len(quality.Quality)) + streams := make(map[string]*types.Stream, len(quality.Quality)) for _, q := range quality.Quality { - apiURL, err := genAPI(options.aid, options.cid, options.bangumi, strconv.Itoa(q), seasonType) + apiURL, err := genAPI(options.aid, options.cid, options.bangumi, strconv.Itoa(q), seasonType, options.cookie) if err != nil { - return downloader.EmptyData(options.url, err) + return types.EmptyData(options.url, err) } jsonString, err := request.Get(apiURL, referer, nil) if err != nil { - return downloader.EmptyData(options.url, err) + return types.EmptyData(options.url, err) } var data bilibiliData err = json.Unmarshal([]byte(jsonString), &data) if err != nil { - return downloader.EmptyData(options.url, err) + return types.EmptyData(options.url, err) } // Avoid duplicate streams @@ -313,9 +319,9 @@ func bilibiliDownload(options bilibiliOptions) downloader.Data { continue } - urls, size := genURL(data.DURL) - streams[strconv.Itoa(data.Quality)] = downloader.Stream{ - URLs: urls, + parts, size := genParts(data.DURL) + streams[strconv.Itoa(data.Quality)] = &types.Stream{ + Parts: parts, Size: size, Quality: qualityString[data.Quality], } @@ -324,30 +330,26 @@ func bilibiliDownload(options bilibiliOptions) downloader.Data { // get the title doc, err := parser.GetDoc(html) if err != nil { - return downloader.EmptyData(options.url, err) + return types.EmptyData(options.url, err) } title := parser.Title(doc) if options.subtitle != "" { - if config.EpisodeTitleOnly { + if extractOption.EpisodeTitleOnly { title = fmt.Sprintf("P%d %s", options.page, options.subtitle) } else { title = fmt.Sprintf("%s P%d %s", title, options.page, options.subtitle) } } - err = downloader.Caption( - fmt.Sprintf("https://comment.bilibili.com/%d.xml", options.cid), - options.url, title, "xml", - ) - if err != nil { - return downloader.EmptyData(options.url, err) - } - - return downloader.Data{ + return &types.Data{ Site: "哔哩哔哩 bilibili.com", Title: title, - Type: "video", + Type: types.DataTypeVideo, Streams: streams, - URL: options.url, + Caption: &types.Part{ + URL: fmt.Sprintf("https://comment.bilibili.com/%d.xml", options.cid), + Ext: "xml", + }, + URL: options.url, } } diff --git a/extractors/bilibili/bilibili_test.go b/extractors/bilibili/bilibili_test.go index a1b87fd05..6e6b90633 100644 --- a/extractors/bilibili/bilibili_test.go +++ b/extractors/bilibili/bilibili_test.go @@ -3,14 +3,11 @@ package bilibili import ( "testing" - "github.com/iawia002/annie/config" - "github.com/iawia002/annie/downloader" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/test" ) func TestBilibili(t *testing.T) { - config.InfoOnly = true - config.ThreadNumber = 9 // travis out of memory issue tests := []struct { name string args test.Args @@ -72,18 +69,18 @@ func TestBilibili(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { var ( - data []downloader.Data + data []*types.Data err error ) - if tt.playlist { // for playlist, we don't check the data - config.Playlist = true - _, err = Extract(tt.args.URL) + _, err = New().Extract(tt.args.URL, types.Options{ + Playlist: true, + ThreadNumber: 9, + }) test.CheckError(t, err) } else { - config.Playlist = false - data, err = Extract(tt.args.URL) + data, err = New().Extract(tt.args.URL, types.Options{}) test.CheckError(t, err) test.Check(t, tt.args, data[0]) } diff --git a/extractors/defs.go b/extractors/defs.go deleted file mode 100644 index 99777b8fc..000000000 --- a/extractors/defs.go +++ /dev/null @@ -1,8 +0,0 @@ -package extractors - -import ( - "errors" -) - -var ErrURLParseFailed = errors.New("url parse failed") -var ErrLoginRequired = errors.New("login required") diff --git a/extractors/douyin/douyin.go b/extractors/douyin/douyin.go index b92db9f51..911ee60d7 100644 --- a/extractors/douyin/douyin.go +++ b/extractors/douyin/douyin.go @@ -1,29 +1,39 @@ package douyin import ( - "github.com/iawia002/annie/downloader" - "github.com/iawia002/annie/extractors" + "encoding/json" + "errors" + "fmt" + + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/request" "github.com/iawia002/annie/utils" ) -// Extract is the main function for extracting data -func Extract(url string) ([]downloader.Data, error) { +type data struct { + ItemList []struct { + Desc string `json:"desc"` + } `json:"item_list"` +} + +type extractor struct{} + +// New returns a youtube extractor. +func New() types.Extractor { + return &extractor{} +} + +// Extract is the main function to extract the data. +func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) { var err error html, err := request.Get(url, url, nil) if err != nil { return nil, err } - var title string - desc := utils.MatchOneOf(html, `

(.+?)

`) - if desc != nil { - title = desc[1] - } else { - title = "抖音短视频" - } + realURLs := utils.MatchOneOf(html, `playAddr: "(.+?)"`) if realURLs == nil || len(realURLs) < 2 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } realURL := realURLs[1] @@ -31,22 +41,48 @@ func Extract(url string) ([]downloader.Data, error) { if err != nil { return nil, err } - urlData := downloader.URL{ + urlData := &types.Part{ URL: realURL, Size: size, Ext: "mp4", } - streams := map[string]downloader.Stream{ + streams := map[string]*types.Stream{ "default": { - URLs: []downloader.URL{urlData}, - Size: size, + Parts: []*types.Part{urlData}, + Size: size, }, } - return []downloader.Data{ + + videoIDs := utils.MatchOneOf(url, `/video/(\d+)`) + if len(videoIDs) == 0 { + return nil, errors.New("unable to get video ID") + } + videoID := videoIDs[1] + + dytks := utils.MatchOneOf(html, `dytk: "(.+?)"`) + if len(dytks) == 0 { + return nil, errors.New("unable to get dytk info") + } + dytk := dytks[1] + + apiDataString, err := request.Get( + fmt.Sprintf("https://www.douyin.com/web/api/v2/aweme/iteminfo/?item_ids=%s&dytk=%s", videoID, dytk), + url, nil, + ) + if err != nil { + return nil, err + } + + var apiData data + if err = json.Unmarshal([]byte(apiDataString), &apiData); err != nil { + return nil, err + } + + return []*types.Data{ { Site: "抖音 douyin.com", - Title: title, - Type: "video", + Title: apiData.ItemList[0].Desc, + Type: types.DataTypeVideo, Streams: streams, URL: url, }, diff --git a/extractors/douyin/douyin_test.go b/extractors/douyin/douyin_test.go index ca10121c3..e6f09690b 100644 --- a/extractors/douyin/douyin_test.go +++ b/extractors/douyin/douyin_test.go @@ -3,13 +3,11 @@ package douyin import ( "testing" - "github.com/iawia002/annie/config" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/test" ) func TestDownload(t *testing.T) { - config.InfoOnly = true - config.RetryTimes = 100 tests := []struct { name string args test.Args @@ -24,7 +22,7 @@ func TestDownload(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - data, err := Extract(tt.args.URL) + data, err := New().Extract(tt.args.URL, types.Options{}) test.CheckError(t, err) test.Check(t, tt.args, data[0]) }) diff --git a/extractors/douyu/douyu.go b/extractors/douyu/douyu.go index 4f512fc13..2052d8eea 100644 --- a/extractors/douyu/douyu.go +++ b/extractors/douyu/douyu.go @@ -4,8 +4,7 @@ import ( "encoding/json" "errors" - "github.com/iawia002/annie/downloader" - "github.com/iawia002/annie/extractors" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/request" "github.com/iawia002/annie/utils" ) @@ -48,8 +47,15 @@ func douyuM3u8(url string) ([]douyuURLInfo, int64, error) { return data, totalSize, nil } -// Extract is the main function for extracting data -func Extract(url string) ([]downloader.Data, error) { +type extractor struct{} + +// New returns a youtube extractor. +func New() types.Extractor { + return &extractor{} +} + +// Extract is the main function to extract the data. +func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) { var err error liveVid := utils.MatchOneOf(url, `https?://www.douyu.com/(\S+)`) if liveVid != nil { @@ -62,13 +68,13 @@ func Extract(url string) ([]downloader.Data, error) { } titles := utils.MatchOneOf(html, `(.*?)`) if titles == nil || len(titles) < 2 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } title := titles[1] vids := utils.MatchOneOf(url, `https?://v.douyu.com/show/(\S+)`) if vids == nil || len(vids) < 2 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } vid := vids[1] @@ -76,33 +82,35 @@ func Extract(url string) ([]downloader.Data, error) { if err != nil { return nil, err } - var dataDict douyuData - json.Unmarshal([]byte(dataString), &dataDict) + dataDict := new(douyuData) + if err := json.Unmarshal([]byte(dataString), dataDict); err != nil { + return nil, err + } m3u8URLs, totalSize, err := douyuM3u8(dataDict.Data.VideoURL) if err != nil { return nil, err } - urls := make([]downloader.URL, len(m3u8URLs)) + urls := make([]*types.Part, len(m3u8URLs)) for index, u := range m3u8URLs { - urls[index] = downloader.URL{ + urls[index] = &types.Part{ URL: u.URL, Size: u.Size, Ext: "ts", } } - streams := map[string]downloader.Stream{ + streams := map[string]*types.Stream{ "default": { - URLs: urls, - Size: totalSize, + Parts: urls, + Size: totalSize, }, } - return []downloader.Data{ + return []*types.Data{ { Site: "斗鱼 douyu.com", Title: title, - Type: "video", + Type: types.DataTypeVideo, Streams: streams, URL: url, }, diff --git a/extractors/douyu/douyu_test.go b/extractors/douyu/douyu_test.go index 49f65f73d..1d5190f96 100644 --- a/extractors/douyu/douyu_test.go +++ b/extractors/douyu/douyu_test.go @@ -3,13 +3,11 @@ package douyu import ( "testing" - "github.com/iawia002/annie/config" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/test" ) func TestDownload(t *testing.T) { - config.InfoOnly = true - config.RetryTimes = 10 tests := []struct { name string args test.Args @@ -25,10 +23,7 @@ func TestDownload(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // data, err := Extract(tt.args.URL) - // test.CheckError(t, err) - // test.Check(t, tt.args, data[0]) - Extract(tt.args.URL) + New().Extract(tt.args.URL, types.Options{}) }) } } diff --git a/extractors/extractors.go b/extractors/extractors.go new file mode 100644 index 000000000..641bcda58 --- /dev/null +++ b/extractors/extractors.go @@ -0,0 +1,105 @@ +package extractors + +import ( + "net/url" + "strings" + + "github.com/iawia002/annie/extractors/bcy" + "github.com/iawia002/annie/extractors/bilibili" + "github.com/iawia002/annie/extractors/douyin" + "github.com/iawia002/annie/extractors/douyu" + "github.com/iawia002/annie/extractors/facebook" + "github.com/iawia002/annie/extractors/geekbang" + "github.com/iawia002/annie/extractors/instagram" + "github.com/iawia002/annie/extractors/iqiyi" + "github.com/iawia002/annie/extractors/mgtv" + "github.com/iawia002/annie/extractors/miaopai" + "github.com/iawia002/annie/extractors/netease" + "github.com/iawia002/annie/extractors/pixivision" + "github.com/iawia002/annie/extractors/pornhub" + "github.com/iawia002/annie/extractors/qq" + "github.com/iawia002/annie/extractors/tangdou" + "github.com/iawia002/annie/extractors/tiktok" + "github.com/iawia002/annie/extractors/tumblr" + "github.com/iawia002/annie/extractors/twitter" + "github.com/iawia002/annie/extractors/types" + "github.com/iawia002/annie/extractors/udn" + "github.com/iawia002/annie/extractors/universal" + "github.com/iawia002/annie/extractors/vimeo" + "github.com/iawia002/annie/extractors/weibo" + "github.com/iawia002/annie/extractors/xvideos" + "github.com/iawia002/annie/extractors/yinyuetai" + "github.com/iawia002/annie/extractors/youku" + "github.com/iawia002/annie/extractors/youtube" + "github.com/iawia002/annie/utils" +) + +var extractorMap map[string]types.Extractor + +func init() { + douyinExtractor := douyin.New() + youtubeExtractor := youtube.New() + + extractorMap = map[string]types.Extractor{ + "": universal.New(), // universal extractor + + "douyin": douyinExtractor, + "iesdouyin": douyinExtractor, + "bilibili": bilibili.New(), + "bcy": bcy.New(), + "pixivision": pixivision.New(), + "youku": youku.New(), + "youtube": youtubeExtractor, + "youtu": youtubeExtractor, // youtu.be + "iqiyi": iqiyi.New(), + "mgtv": mgtv.New(), + "tangdou": tangdou.New(), + "tumblr": tumblr.New(), + "vimeo": vimeo.New(), + "facebook": facebook.New(), + "douyu": douyu.New(), + "miaopai": miaopai.New(), + "163": netease.New(), + "weibo": weibo.New(), + "instagram": instagram.New(), + "twitter": twitter.New(), + "qq": qq.New(), + "yinyuetai": yinyuetai.New(), + "geekbang": geekbang.New(), + "pornhub": pornhub.New(), + "xvideos": xvideos.New(), + "udn": udn.New(), + "tiktok": tiktok.New(), + } +} + +// Extract is the main function to extract the data. +func Extract(u string, option types.Options) ([]*types.Data, error) { + u = strings.TrimSpace(u) + var domain string + + bilibiliShortLink := utils.MatchOneOf(u, `^(av|ep)\d+`) + if len(bilibiliShortLink) > 1 { + bilibiliURL := map[string]string{ + "av": "https://www.bilibili.com/video/", + "ep": "https://www.bilibili.com/bangumi/play/", + } + domain = "bilibili" + u = bilibiliURL[bilibiliShortLink[1]] + u + } else { + u, err := url.ParseRequestURI(u) + if err != nil { + return nil, err + } + domain = utils.Domain(u.Host) + } + extractor := extractorMap[domain] + videos, err := extractor.Extract(u, option) + if err != nil { + return nil, err + } + for _, v := range videos { + v.FillUpStreamsData() + } + return videos, nil +} diff --git a/extractors/facebook/facebook.go b/extractors/facebook/facebook.go index 737d2d7bc..2760145de 100644 --- a/extractors/facebook/facebook.go +++ b/extractors/facebook/facebook.go @@ -3,14 +3,20 @@ package facebook import ( "fmt" - "github.com/iawia002/annie/downloader" - "github.com/iawia002/annie/extractors" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/request" "github.com/iawia002/annie/utils" ) -// Extract is the main function for extracting data -func Extract(url string) ([]downloader.Data, error) { +type extractor struct{} + +// New returns a youtube extractor. +func New() types.Extractor { + return &extractor{} +} + +// Extract is the main function to extract the data. +func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) { var err error html, err := request.Get(url, url, nil) if err != nil { @@ -18,11 +24,11 @@ func Extract(url string) ([]downloader.Data, error) { } titles := utils.MatchOneOf(html, `(.+)`) if titles == nil || len(titles) < 2 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } title := titles[1] - streams := map[string]downloader.Stream{} + streams := make(map[string]*types.Stream, 2) for _, quality := range []string{"sd", "hd"} { srcElement := utils.MatchOneOf( html, fmt.Sprintf(`%s_src_no_ratelimit:"(.+?)"`, quality), @@ -36,23 +42,23 @@ func Extract(url string) ([]downloader.Data, error) { if err != nil { return nil, err } - urlData := downloader.URL{ + urlData := &types.Part{ URL: u, Size: size, Ext: "mp4", } - streams[quality] = downloader.Stream{ - URLs: []downloader.URL{urlData}, + streams[quality] = &types.Stream{ + Parts: []*types.Part{urlData}, Size: size, Quality: quality, } } - return []downloader.Data{ + return []*types.Data{ { Site: "Facebook facebook.com", Title: title, - Type: "video", + Type: types.DataTypeVideo, Streams: streams, URL: url, }, diff --git a/extractors/facebook/facebook_test.go b/extractors/facebook/facebook_test.go index c1c3b4b5a..44ec5a03c 100644 --- a/extractors/facebook/facebook_test.go +++ b/extractors/facebook/facebook_test.go @@ -3,12 +3,11 @@ package facebook import ( "testing" - "github.com/iawia002/annie/config" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/test" ) func TestDownload(t *testing.T) { - config.InfoOnly = true tests := []struct { name string args test.Args @@ -25,7 +24,7 @@ func TestDownload(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - data, err := Extract(tt.args.URL) + data, err := New().Extract(tt.args.URL, types.Options{}) test.CheckError(t, err) test.Check(t, tt.args, data[0]) }) diff --git a/extractors/geekbang/geekbang.go b/extractors/geekbang/geekbang.go index 2a2f5f460..61602b3b8 100644 --- a/extractors/geekbang/geekbang.go +++ b/extractors/geekbang/geekbang.go @@ -3,11 +3,11 @@ package geekbang import ( "encoding/json" "errors" + "fmt" "net/http" "strings" - "github.com/iawia002/annie/downloader" - "github.com/iawia002/annie/extractors" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/request" "github.com/iawia002/annie/utils" ) @@ -71,22 +71,29 @@ func geekM3u8(url string) ([]geekURLInfo, error) { return data, nil } -// Extract is the main function for extracting data -func Extract(url string) ([]downloader.Data, error) { +type extractor struct{} + +// New returns a youtube extractor. +func New() types.Extractor { + return &extractor{} +} + +// Extract is the main function to extract the data. +func (e *extractor) Extract(url string, _ types.Options) ([]*types.Data, error) { var err error matches := utils.MatchOneOf(url, `https?://time.geekbang.org/course/detail/(\d+)-(\d+)`) if matches == nil || len(matches) < 3 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } // Get video information heanders := map[string]string{"Origin": "https://time.geekbang.org", "Content-Type": "application/json", "Referer": url} - params := strings.NewReader("{\"id\":" + string(matches[2]) + "}") + params := strings.NewReader(fmt.Sprintf(`{"id": %q}`, matches[2])) res, err := request.Request(http.MethodPost, "https://time.geekbang.org/serv/v1/article", params, heanders) if err != nil { return nil, err } - defer res.Body.Close() + defer res.Body.Close() // nolint var data geekData if err = json.NewDecoder(res.Body).Decode(&data); err != nil { @@ -107,7 +114,7 @@ func Extract(url string) ([]downloader.Data, error) { if err != nil { return nil, err } - defer res.Body.Close() + defer res.Body.Close() // nolint var playAuth videoPlayAuth if err = json.NewDecoder(res.Body).Decode(&playAuth); err != nil { @@ -124,7 +131,7 @@ func Extract(url string) ([]downloader.Data, error) { if err != nil { return nil, err } - defer res.Body.Close() + defer res.Body.Close() // nolint var playInfo playInfo if err = json.NewDecoder(res.Body).Decode(&playInfo); err != nil { @@ -133,7 +140,7 @@ func Extract(url string) ([]downloader.Data, error) { title := data.Data.Title - streams := make(map[string]downloader.Stream, len(playInfo.PlayInfoList.PlayInfo)) + streams := make(map[string]*types.Stream, len(playInfo.PlayInfoList.PlayInfo)) for _, media := range playInfo.PlayInfoList.PlayInfo { m3u8URLs, err := geekM3u8(media.URL) @@ -142,27 +149,26 @@ func Extract(url string) ([]downloader.Data, error) { return nil, err } - urls := make([]downloader.URL, len(m3u8URLs)) + urls := make([]*types.Part, len(m3u8URLs)) for index, u := range m3u8URLs { - urls[index] = downloader.URL{ + urls[index] = &types.Part{ URL: u.URL, Size: u.Size, Ext: "ts", } } - streams[media.Definition] = downloader.Stream{ - URLs: urls, - Size: media.Size, - Quality: media.Definition, + streams[media.Definition] = &types.Stream{ + Parts: urls, + Size: media.Size, } } - return []downloader.Data{ + return []*types.Data{ { Site: "极客时间 geekbang.org", Title: title, - Type: "video", + Type: types.DataTypeVideo, Streams: streams, URL: url, }, diff --git a/extractors/geekbang/geekbang_test.go b/extractors/geekbang/geekbang_test.go index fef1c1558..cc5dd01f2 100644 --- a/extractors/geekbang/geekbang_test.go +++ b/extractors/geekbang/geekbang_test.go @@ -3,13 +3,11 @@ package geekbang import ( "testing" - "github.com/iawia002/annie/config" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/test" ) func TestDownload(t *testing.T) { - config.InfoOnly = true - config.RetryTimes = 10 tests := []struct { name string args test.Args @@ -25,7 +23,7 @@ func TestDownload(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - data, err := Extract(tt.args.URL) + data, err := New().Extract(tt.args.URL, types.Options{}) test.CheckError(t, err) test.Check(t, tt.args, data[0]) }) diff --git a/extractors/instagram/instagram.go b/extractors/instagram/instagram.go index a8e46c396..a0ea67572 100644 --- a/extractors/instagram/instagram.go +++ b/extractors/instagram/instagram.go @@ -3,8 +3,7 @@ package instagram import ( "encoding/json" - "github.com/iawia002/annie/downloader" - "github.com/iawia002/annie/extractors" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/parser" "github.com/iawia002/annie/request" "github.com/iawia002/annie/utils" @@ -30,8 +29,15 @@ type instagram struct { } `json:"entry_data"` } -// Extract is the main function for extracting data -func Extract(url string) ([]downloader.Data, error) { +type extractor struct{} + +// New returns a youtube extractor. +func New() types.Extractor { + return &extractor{} +} + +// Extract is the main function to extract the data. +func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) { html, err := request.Get(url, url, nil) if err != nil { return nil, err @@ -45,29 +51,32 @@ func Extract(url string) ([]downloader.Data, error) { dataStrings := utils.MatchOneOf(html, `window\._sharedData\s*=\s*(.*);`) if dataStrings == nil || len(dataStrings) < 2 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } dataString := dataStrings[1] var data instagram if err = json.Unmarshal([]byte(dataString), &data); err != nil { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } - var realURL, dataType string - var size int64 - streams := map[string]downloader.Stream{} + var ( + realURL string + dataType types.DataType + size int64 + ) + streams := make(map[string]*types.Stream) if data.EntryData.PostPage[0].Graphql.ShortcodeMedia.VideoURL != "" { - // Data - dataType = "video" + // Video + dataType = types.DataTypeVideo realURL = data.EntryData.PostPage[0].Graphql.ShortcodeMedia.VideoURL size, err = request.Size(realURL, url) if err != nil { return nil, err } - streams["default"] = downloader.Stream{ - URLs: []downloader.URL{ + streams["default"] = &types.Stream{ + Parts: []*types.Part{ { URL: realURL, Size: size, @@ -78,7 +87,7 @@ func Extract(url string) ([]downloader.Data, error) { } } else { // Image - dataType = "image" + dataType = types.DataTypeImage if data.EntryData.PostPage[0].Graphql.ShortcodeMedia.EdgeSidecar.Edges == nil { // Single realURL = data.EntryData.PostPage[0].Graphql.ShortcodeMedia.DisplayURL @@ -86,8 +95,8 @@ func Extract(url string) ([]downloader.Data, error) { if err != nil { return nil, err } - streams["default"] = downloader.Stream{ - URLs: []downloader.URL{ + streams["default"] = &types.Stream{ + Parts: []*types.Part{ { URL: realURL, Size: size, @@ -99,14 +108,14 @@ func Extract(url string) ([]downloader.Data, error) { } else { // Album var totalSize int64 - var urls []downloader.URL + var urls []*types.Part for _, u := range data.EntryData.PostPage[0].Graphql.ShortcodeMedia.EdgeSidecar.Edges { realURL = u.Node.DisplayURL size, err = request.Size(realURL, url) if err != nil { return nil, err } - urlData := downloader.URL{ + urlData := &types.Part{ URL: realURL, Size: size, Ext: "jpg", @@ -114,14 +123,14 @@ func Extract(url string) ([]downloader.Data, error) { urls = append(urls, urlData) totalSize += size } - streams["default"] = downloader.Stream{ - URLs: urls, - Size: totalSize, + streams["default"] = &types.Stream{ + Parts: urls, + Size: totalSize, } } } - return []downloader.Data{ + return []*types.Data{ { Site: "Instagram instagram.com", Title: title, diff --git a/extractors/instagram/instagram_test.go b/extractors/instagram/instagram_test.go index 1ce4ac662..1d508a35a 100644 --- a/extractors/instagram/instagram_test.go +++ b/extractors/instagram/instagram_test.go @@ -3,12 +3,11 @@ package instagram import ( "testing" - "github.com/iawia002/annie/config" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/test" ) func TestDownload(t *testing.T) { - config.InfoOnly = true tests := []struct { name string args test.Args @@ -40,7 +39,7 @@ func TestDownload(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - data, err := Extract(tt.args.URL) + data, err := New().Extract(tt.args.URL, types.Options{}) test.CheckError(t, err) test.Check(t, tt.args, data[0]) }) diff --git a/extractors/iqiyi/iqiyi.go b/extractors/iqiyi/iqiyi.go index 7a3ddd6e5..ca83f3bb7 100644 --- a/extractors/iqiyi/iqiyi.go +++ b/extractors/iqiyi/iqiyi.go @@ -8,8 +8,7 @@ import ( "strings" "time" - "github.com/iawia002/annie/downloader" - "github.com/iawia002/annie/extractors" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/parser" "github.com/iawia002/annie/request" "github.com/iawia002/annie/utils" @@ -74,7 +73,7 @@ func getVF(params string) string { return utils.Md5(params) } -func getVPS(tvid, vid string) (iqiyi, error) { +func getVPS(tvid, vid string) (*iqiyi, error) { t := time.Now().Unix() * 1000 host := "http://cache.video.qiyi.com" params := fmt.Sprintf( @@ -85,15 +84,24 @@ func getVPS(tvid, vid string) (iqiyi, error) { apiURL := fmt.Sprintf("%s%s&vf=%s", host, params, vf) info, err := request.Get(apiURL, iqiyiReferer, nil) if err != nil { - return iqiyi{}, err + return nil, err + } + data := new(iqiyi) + if err := json.Unmarshal([]byte(info), data); err != nil { + return nil, err } - var data iqiyi - json.Unmarshal([]byte(info), &data) return data, nil } -// Extract is the main function for extracting data -func Extract(url string) ([]downloader.Data, error) { +type extractor struct{} + +// New returns a youtube extractor. +func New() types.Extractor { + return &extractor{} +} + +// Extract is the main function to extract the data. +func (e *extractor) Extract(url string, _ types.Options) ([]*types.Data, error) { html, err := request.Get(url, iqiyiReferer, nil) if err != nil { return nil, err @@ -112,7 +120,7 @@ func Extract(url string) ([]downloader.Data, error) { ) } if tvid == nil || len(tvid) < 2 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } vid := utils.MatchOneOf( @@ -129,7 +137,7 @@ func Extract(url string) ([]downloader.Data, error) { ) } if vid == nil || len(vid) < 2 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } doc, err := parser.GetDoc(html) @@ -155,10 +163,11 @@ func Extract(url string) ([]downloader.Data, error) { if videoDatas.Code != "A00000" { return nil, fmt.Errorf("can't play this video: %s", videoDatas.Msg) } - streams := map[string]downloader.Stream{} + + streams := make(map[string]*types.Stream) urlPrefix := videoDatas.Data.VP.Du for _, video := range videoDatas.Data.VP.Tkl[0].Vs { - urls := make([]downloader.URL, len(video.Fs)) + urls := make([]*types.Part, len(video.Fs)) for index, v := range video.Fs { realURLData, err := request.Get(urlPrefix+v.L, iqiyiReferer, nil) if err != nil { @@ -172,24 +181,24 @@ func Extract(url string) ([]downloader.Data, error) { if err != nil { return nil, err } - urls[index] = downloader.URL{ + urls[index] = &types.Part{ URL: realURL.L, Size: v.B, Ext: ext, } } - streams[strconv.Itoa(video.Bid)] = downloader.Stream{ - URLs: urls, + streams[strconv.Itoa(video.Bid)] = &types.Stream{ + Parts: urls, Size: video.Vsize, Quality: video.Scrsz, } } - return []downloader.Data{ + return []*types.Data{ { Site: "爱奇艺 iqiyi.com", Title: title, - Type: "video", + Type: types.DataTypeVideo, Streams: streams, URL: url, }, diff --git a/extractors/iqiyi/iqiyi_test.go b/extractors/iqiyi/iqiyi_test.go index d2a5bcc1a..41b1573ab 100644 --- a/extractors/iqiyi/iqiyi_test.go +++ b/extractors/iqiyi/iqiyi_test.go @@ -3,13 +3,11 @@ package iqiyi import ( "testing" - "github.com/iawia002/annie/config" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/test" ) func TestDownload(t *testing.T) { - config.InfoOnly = true - config.RetryTimes = 10 tests := []struct { name string args test.Args @@ -44,7 +42,7 @@ func TestDownload(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - data, err := Extract(tt.args.URL) + data, err := New().Extract(tt.args.URL, types.Options{}) test.CheckError(t, err) test.Check(t, tt.args, data[0]) }) diff --git a/extractors/mgtv/mgtv.go b/extractors/mgtv/mgtv.go index 8f2051a4c..698458a30 100644 --- a/extractors/mgtv/mgtv.go +++ b/extractors/mgtv/mgtv.go @@ -9,8 +9,7 @@ import ( "strings" "time" - "github.com/iawia002/annie/downloader" - "github.com/iawia002/annie/extractors" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/request" "github.com/iawia002/annie/utils" ) @@ -95,8 +94,15 @@ func encodeTk2(str string) string { return encodeString } -// Extract is the main function for extracting data -func Extract(url string) ([]downloader.Data, error) { +type extractor struct{} + +// New returns a youtube extractor. +func New() types.Extractor { + return &extractor{} +} + +// Extract is the main function to extract the data. +func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) { html, err := request.Get(url, url, nil) if err != nil { return nil, err @@ -110,7 +116,7 @@ func Extract(url string) ([]downloader.Data, error) { vid = utils.MatchOneOf(html, `vid: (\d+),`) } if vid == nil || len(vid) < 2 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } // API extract from https://js.mgtv.com/imgotv-miniv6/global/page/play-tv.js @@ -160,7 +166,7 @@ func Extract(url string) ([]downloader.Data, error) { ) mgtvStreams := mgtvData.Data.Stream var addr mgtvVideoAddr - streams := map[string]downloader.Stream{} + streams := make(map[string]*types.Stream) for _, stream := range mgtvStreams { if stream.URL == "" { continue @@ -179,26 +185,26 @@ func Extract(url string) ([]downloader.Data, error) { if err != nil { return nil, err } - urls := make([]downloader.URL, len(m3u8URLs)) + urls := make([]*types.Part, len(m3u8URLs)) for index, u := range m3u8URLs { - urls[index] = downloader.URL{ + urls[index] = &types.Part{ URL: u.URL, Size: u.Size, Ext: "ts", } } - streams[stream.Def] = downloader.Stream{ - URLs: urls, + streams[stream.Def] = &types.Stream{ + Parts: urls, Size: totalSize, Quality: stream.Name, } } - return []downloader.Data{ + return []*types.Data{ { Site: "芒果TV mgtv.com", Title: title, - Type: "video", + Type: types.DataTypeVideo, Streams: streams, URL: url, }, diff --git a/extractors/mgtv/mgtv_test.go b/extractors/mgtv/mgtv_test.go index c41eae5d9..566a81c40 100644 --- a/extractors/mgtv/mgtv_test.go +++ b/extractors/mgtv/mgtv_test.go @@ -3,13 +3,11 @@ package mgtv import ( "testing" - "github.com/iawia002/annie/config" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/test" ) func TestDownload(t *testing.T) { - config.InfoOnly = true - config.RetryTimes = 20 tests := []struct { name string args test.Args @@ -44,10 +42,7 @@ func TestDownload(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - Extract(tt.args.URL) - // data, err := Extract(tt.args.URL) - // test.CheckError(t, err) - // test.Check(t, tt.args, data[0]) + New().Extract(tt.args.URL, types.Options{}) }) } } diff --git a/extractors/miaopai/miaopai.go b/extractors/miaopai/miaopai.go index 58efd6e34..8dfffb940 100644 --- a/extractors/miaopai/miaopai.go +++ b/extractors/miaopai/miaopai.go @@ -8,8 +8,7 @@ import ( "strings" "time" - "github.com/iawia002/annie/downloader" - "github.com/iawia002/annie/extractors" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/request" "github.com/iawia002/annie/utils" ) @@ -39,11 +38,18 @@ func getRandomString(l int) string { return strings.Join(s, "") } -// Extract is the main function for extracting data -func Extract(url string) ([]downloader.Data, error) { +type extractor struct{} + +// New returns a youtube extractor. +func New() types.Extractor { + return &extractor{} +} + +// Extract is the main function to extract the data. +func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) { ids := utils.MatchOneOf(url, `/media/([^\./]+)`, `/show(?:/channel)?/([^\./]+)`) if ids == nil || len(ids) < 2 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } id := ids[1] @@ -73,23 +79,23 @@ func Extract(url string) ([]downloader.Data, error) { if err != nil { return nil, err } - urlData := downloader.URL{ + urlData := &types.Part{ URL: realURL, Size: size, Ext: "mp4", } - streams := map[string]downloader.Stream{ + streams := map[string]*types.Stream{ "default": { - URLs: []downloader.URL{urlData}, - Size: size, + Parts: []*types.Part{urlData}, + Size: size, }, } - return []downloader.Data{ + return []*types.Data{ { Site: "秒拍 miaopai.com", Title: data.Data.Description, - Type: "video", + Type: types.DataTypeVideo, Streams: streams, URL: url, }, diff --git a/extractors/miaopai/miaopai_test.go b/extractors/miaopai/miaopai_test.go index b1163d384..841381d55 100644 --- a/extractors/miaopai/miaopai_test.go +++ b/extractors/miaopai/miaopai_test.go @@ -3,13 +3,11 @@ package miaopai import ( "testing" - "github.com/iawia002/annie/config" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/test" ) func TestDownload(t *testing.T) { - config.InfoOnly = true - config.RetryTimes = 10 tests := []struct { name string args test.Args @@ -25,7 +23,7 @@ func TestDownload(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - data, err := Extract(tt.args.URL) + data, err := New().Extract(tt.args.URL, types.Options{}) test.CheckError(t, err) test.Check(t, tt.args, data[0]) }) diff --git a/extractors/netease/netease.go b/extractors/netease/netease.go index ea78b7210..384ed73be 100644 --- a/extractors/netease/netease.go +++ b/extractors/netease/netease.go @@ -5,14 +5,20 @@ import ( netURL "net/url" "strings" - "github.com/iawia002/annie/downloader" - "github.com/iawia002/annie/extractors" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/request" "github.com/iawia002/annie/utils" ) -// Extract is the main function for extracting data -func Extract(url string) ([]downloader.Data, error) { +type extractor struct{} + +// New returns a youtube extractor. +func New() types.Extractor { + return &extractor{} +} + +// Extract is the main function to extract the data. +func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) { url = strings.Replace(url, "/#/", "/", 1) vid := utils.MatchOneOf(url, `/(mv|video)\?id=(\w+)`) if vid == nil { @@ -29,13 +35,13 @@ func Extract(url string) ([]downloader.Data, error) { titles := utils.MatchOneOf(html, ``) if titles == nil || len(titles) < 2 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } title := titles[1] realURLs := utils.MatchOneOf(html, ``) if realURLs == nil || len(realURLs) < 2 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } realURL, _ := netURL.QueryUnescape(realURLs[1]) @@ -43,22 +49,22 @@ func Extract(url string) ([]downloader.Data, error) { if err != nil { return nil, err } - urlData := downloader.URL{ + urlData := &types.Part{ URL: realURL, Size: size, Ext: "mp4", } - streams := map[string]downloader.Stream{ + streams := map[string]*types.Stream{ "default": { - URLs: []downloader.URL{urlData}, - Size: size, + Parts: []*types.Part{urlData}, + Size: size, }, } - return []downloader.Data{ + return []*types.Data{ { Site: "网易云音乐 music.163.com", Title: title, - Type: "video", + Type: types.DataTypeVideo, Streams: streams, URL: url, }, diff --git a/extractors/netease/netease_test.go b/extractors/netease/netease_test.go index 5f3914e71..c897bc43f 100644 --- a/extractors/netease/netease_test.go +++ b/extractors/netease/netease_test.go @@ -3,12 +3,11 @@ package netease import ( "testing" - "github.com/iawia002/annie/config" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/test" ) func TestDownload(t *testing.T) { - config.InfoOnly = true tests := []struct { name string args test.Args @@ -32,7 +31,7 @@ func TestDownload(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - data, err := Extract(tt.args.URL) + data, err := New().Extract(tt.args.URL, types.Options{}) test.CheckError(t, err) test.Check(t, tt.args, data[0]) }) diff --git a/extractors/pixivision/pixivision.go b/extractors/pixivision/pixivision.go index 846152648..b9d451a3d 100644 --- a/extractors/pixivision/pixivision.go +++ b/extractors/pixivision/pixivision.go @@ -1,33 +1,58 @@ package pixivision import ( - "github.com/iawia002/annie/downloader" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/parser" "github.com/iawia002/annie/request" + "github.com/iawia002/annie/utils" ) -// Extract is the main function for extracting data -func Extract(url string) ([]downloader.Data, error) { +type extractor struct{} + +// New returns a youtube extractor. +func New() types.Extractor { + return &extractor{} +} + +// Extract is the main function to extract the data. +func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) { html, err := request.Get(url, url, nil) if err != nil { return nil, err } - title, urls, err := parser.GetImages(url, html, "am__work__illust ", nil) + title, urls, err := parser.GetImages(html, "am__work__illust ", nil) if err != nil { return nil, err } - streams := map[string]downloader.Stream{ + + parts := make([]*types.Part, 0, len(urls)) + for _, u := range urls { + _, ext, err := utils.GetNameAndExt(u) + if err != nil { + return nil, err + } + size, err := request.Size(u, url) + if err != nil { + return nil, err + } + parts = append(parts, &types.Part{ + URL: u, + Size: size, + Ext: ext, + }) + } + + streams := map[string]*types.Stream{ "default": { - URLs: urls, - Size: 0, + Parts: parts, }, } - return []downloader.Data{ + return []*types.Data{ { Site: "pixivision pixivision.net", Title: title, - Type: "image", + Type: types.DataTypeImage, Streams: streams, URL: url, }, diff --git a/extractors/pixivision/pixivision_test.go b/extractors/pixivision/pixivision_test.go index f278503b7..0ead03047 100644 --- a/extractors/pixivision/pixivision_test.go +++ b/extractors/pixivision/pixivision_test.go @@ -3,13 +3,11 @@ package pixivision import ( "testing" - "github.com/iawia002/annie/config" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/test" ) func TestDownload(t *testing.T) { - config.InfoOnly = true - config.RetryTimes = 100 tests := []struct { name string args test.Args @@ -24,7 +22,7 @@ func TestDownload(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - data, err := Extract(tt.args.URL) + data, err := New().Extract(tt.args.URL, types.Options{}) test.CheckError(t, err) test.Check(t, tt.args, data[0]) }) diff --git a/extractors/pornhub/pornhub.go b/extractors/pornhub/pornhub.go index 7db8e1b40..094c4d457 100644 --- a/extractors/pornhub/pornhub.go +++ b/extractors/pornhub/pornhub.go @@ -5,8 +5,7 @@ import ( "encoding/json" "fmt" - "github.com/iawia002/annie/downloader" - "github.com/iawia002/annie/extractors" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/request" "github.com/iawia002/annie/utils" ) @@ -17,8 +16,15 @@ type pornhubData struct { VideoURL string `json:"videoUrl"` } -// Extract is the main function for extracting data -func Extract(url string) ([]downloader.Data, error) { +type extractor struct{} + +// New returns a youtube extractor. +func New() types.Extractor { + return &extractor{} +} + +// Extract is the main function to extract the data. +func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) { html, err := request.Get(url, url, nil) if err != nil { return nil, err @@ -26,7 +32,7 @@ func Extract(url string) ([]downloader.Data, error) { var title string desc := utils.MatchOneOf(html, `(.+?)`) - if desc != nil && len(desc) > 1 { + if len(desc) > 1 { title = desc[1] } else { title = "pornhub video" @@ -34,7 +40,7 @@ func Extract(url string) ([]downloader.Data, error) { realURLs := utils.MatchOneOf(html, `"mediaDefinitions":(.+?),"isVertical"`) if realURLs == nil || len(realURLs) < 2 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } var pornhubs []pornhubData @@ -42,7 +48,7 @@ func Extract(url string) ([]downloader.Data, error) { return nil, err } - streams := make(map[string]downloader.Stream, len(pornhubs)) + streams := make(map[string]*types.Stream, len(pornhubs)) for _, data := range pornhubs { if data.Format == "hls" { continue @@ -67,23 +73,23 @@ func Extract(url string) ([]downloader.Data, error) { if err != nil { return nil, err } - urlData := downloader.URL{ + urlData := &types.Part{ URL: realURL, Size: size, Ext: "mp4", } - streams[quality] = downloader.Stream{ - URLs: []downloader.URL{urlData}, + streams[quality] = &types.Stream{ + Parts: []*types.Part{urlData}, Size: size, Quality: fmt.Sprintf("%sP", quality), } } - return []downloader.Data{ + return []*types.Data{ { Site: "Pornhub pornhub.com", Title: title, - Type: "video", + Type: types.DataTypeVideo, Streams: streams, URL: url, }, diff --git a/extractors/pornhub/pornhub_test.go b/extractors/pornhub/pornhub_test.go index 6adf5325c..1798c487f 100644 --- a/extractors/pornhub/pornhub_test.go +++ b/extractors/pornhub/pornhub_test.go @@ -3,13 +3,11 @@ package pornhub import ( "testing" - "github.com/iawia002/annie/config" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/test" ) func TestPornhub(t *testing.T) { - config.InfoOnly = true - config.RetryTimes = 10 tests := []struct { name string args test.Args @@ -24,7 +22,7 @@ func TestPornhub(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - Extract(tt.args.URL) + New().Extract(tt.args.URL, types.Options{}) }) } } diff --git a/extractors/qq/qq.go b/extractors/qq/qq.go index c8588dbcb..7cecef4c3 100644 --- a/extractors/qq/qq.go +++ b/extractors/qq/qq.go @@ -7,8 +7,7 @@ import ( "strconv" "strings" - "github.com/iawia002/annie/downloader" - "github.com/iawia002/annie/extractors" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/request" "github.com/iawia002/annie/utils" ) @@ -49,8 +48,8 @@ type qqKeyInfo struct { const qqPlayerVersion string = "3.2.19.333" -func genStreams(vid, cdn string, data qqVideoInfo) (map[string]downloader.Stream, error) { - streams := map[string]downloader.Stream{} +func genStreams(vid, cdn string, data qqVideoInfo) (map[string]*types.Stream, error) { + streams := make(map[string]*types.Stream) var vkey string // number of fragments clips := data.Vl.Vi[0].Cl.Fc @@ -81,7 +80,7 @@ func genStreams(vid, cdn string, data qqVideoInfo) (map[string]downloader.Stream fns = append(fns[:1], fns[2:]...) } - var urls []downloader.URL + var urls []*types.Part var totalSize int64 var filename string for part := 1; part < clips+1; part++ { @@ -107,7 +106,7 @@ func genStreams(vid, cdn string, data qqVideoInfo) (map[string]downloader.Stream } jsonStrings := utils.MatchOneOf(html, `QZOutputJson=(.+);$`) if jsonStrings == nil || len(jsonStrings) < 2 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } jsonString := jsonStrings[1] @@ -125,7 +124,7 @@ func genStreams(vid, cdn string, data qqVideoInfo) (map[string]downloader.Stream if err != nil { return nil, err } - urlData := downloader.URL{ + urlData := &types.Part{ URL: realURL, Size: size, Ext: "mp4", @@ -133,8 +132,8 @@ func genStreams(vid, cdn string, data qqVideoInfo) (map[string]downloader.Stream urls = append(urls, urlData) totalSize += size } - streams[fi.Name] = downloader.Stream{ - URLs: urls, + streams[fi.Name] = &types.Stream{ + Parts: urls, Size: totalSize, Quality: fi.Cname, } @@ -142,11 +141,18 @@ func genStreams(vid, cdn string, data qqVideoInfo) (map[string]downloader.Stream return streams, nil } -// Extract is the main function for extracting data -func Extract(url string) ([]downloader.Data, error) { +type extractor struct{} + +// New returns a youtube extractor. +func New() types.Extractor { + return &extractor{} +} + +// Extract is the main function to extract the data. +func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) { vids := utils.MatchOneOf(url, `vid=(\w+)`, `/(\w+)\.html`) if vids == nil || len(vids) < 2 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } vid := vids[1] @@ -160,7 +166,7 @@ func Extract(url string) ([]downloader.Data, error) { u, `vid=(\w+)`, `vid:\s*["'](\w+)`, `vid\s*=\s*["']\s*(\w+)`, ) if vids == nil || len(vids) < 2 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } vid = vids[1] } @@ -175,7 +181,7 @@ func Extract(url string) ([]downloader.Data, error) { } jsonStrings := utils.MatchOneOf(html, `QZOutputJson=(.+);$`) if jsonStrings == nil || len(jsonStrings) < 2 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } jsonString := jsonStrings[1] @@ -194,11 +200,11 @@ func Extract(url string) ([]downloader.Data, error) { return nil, err } - return []downloader.Data{ + return []*types.Data{ { Site: "腾讯视频 v.qq.com", Title: data.Vl.Vi[0].Ti, - Type: "video", + Type: types.DataTypeVideo, Streams: streams, URL: url, }, diff --git a/extractors/qq/qq_test.go b/extractors/qq/qq_test.go index 139e8a712..b785049e8 100644 --- a/extractors/qq/qq_test.go +++ b/extractors/qq/qq_test.go @@ -3,13 +3,11 @@ package qq import ( "testing" - "github.com/iawia002/annie/config" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/test" ) func TestDownload(t *testing.T) { - config.InfoOnly = true - config.RetryTimes = 10 tests := []struct { name string args test.Args @@ -44,7 +42,7 @@ func TestDownload(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - data, err := Extract(tt.args.URL) + data, err := New().Extract(tt.args.URL, types.Options{}) test.CheckError(t, err) test.Check(t, tt.args, data[0]) }) diff --git a/extractors/tangdou/tangdou.go b/extractors/tangdou/tangdou.go index a2a71b7e6..5c9c084ea 100644 --- a/extractors/tangdou/tangdou.go +++ b/extractors/tangdou/tangdou.go @@ -1,35 +1,42 @@ package tangdou import ( - "github.com/iawia002/annie/config" - "github.com/iawia002/annie/downloader" - "github.com/iawia002/annie/extractors" + "github.com/iawia002/annie/extractors/types" "github.com/iawia002/annie/request" "github.com/iawia002/annie/utils" ) const referer = "http://www.tangdou.com/html/playlist/view/4173" -// Extract is the main function for extracting data -func Extract(uri string) ([]downloader.Data, error) { - if !config.Playlist { - return []downloader.Data{tangdouDownload(uri)}, nil +type extractor struct{} + +// New returns a youtube extractor. +func New() types.Extractor { + return &extractor{} +} + +// Extract is the main function to extract the data. +func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) { + if !option.Playlist { + return []*types.Data{tangdouDownload(url)}, nil } - html, err := request.Get(uri, referer, nil) + + html, err := request.Get(url, referer, nil) if err != nil { return nil, err } + videoIDs := utils.MatchAll(html, ``) - needDownloadItems := utils.NeedDownloadList(len(videoIDs)) - extractedData := make([]downloader.Data, len(needDownloadItems)) - wgp := utils.NewWaitGroupPool(config.ThreadNumber) + needDownloadItems := utils.NeedDownloadList(option.Items, option.ItemStart, option.ItemEnd, len(videoIDs)) + extractedData := make([]*types.Data, len(needDownloadItems)) + wgp := utils.NewWaitGroupPool(option.ThreadNumber) dataIndex := 0 for index, videoID := range videoIDs { if !utils.ItemInSlice(index+1, needDownloadItems) || len(videoID) < 2 { continue } wgp.Add() - go func(index int, videURI string, extractedData []downloader.Data) { + go func(index int, videURI string, extractedData []*types.Data) { defer wgp.Done() extractedData[index] = tangdouDownload(videURI) }(dataIndex, videoID[1], extractedData) @@ -40,17 +47,17 @@ func Extract(uri string) ([]downloader.Data, error) { } // tangdouDownload download function for single url -func tangdouDownload(uri string) downloader.Data { +func tangdouDownload(uri string) *types.Data { html, err := request.Get(uri, referer, nil) if err != nil { - return downloader.EmptyData(uri, err) + return types.EmptyData(uri, err) } titles := utils.MatchOneOf( html, `
(.+?)
`, `(.+?)`, ) if titles == nil || len(titles) < 2 { - return downloader.EmptyData(uri, extractors.ErrURLParseFailed) + return types.EmptyData(uri, types.ErrURLParseFailed) } title := titles[1] @@ -63,37 +70,37 @@ func tangdouDownload(uri string) downloader.Data { html, `
\s*`, ) if jsonStrings == nil || len(jsonStrings) < 2 { - return nil, extractors.ErrURLParseFailed + return nil, types.ErrURLParseFailed } jsonString := jsonStrings[1] var totalSize int64 - var urls []downloader.URL + urls := make([]*types.Part, 0, 1) if strings.Contains(jsonString, `"image":{"@list"`) { // there are two data structures in the same field(image) var imageList tumblrImageList @@ -79,28 +77,28 @@ func tumblrImageDownload(url, html, title string) ([]downloader.Data, error) { totalSize = size urls = append(urls, urlData) } - streams := map[string]downloader.Stream{ + streams := map[string]*types.Stream{ "default": { - URLs: urls, - Size: totalSize, + Parts: urls, + Size: totalSize, }, } - return []downloader.Data{ + return []*types.Data{ { Site: "Tumblr tumblr.com", Title: title, - Type: "image", + Type: types.DataTypeImage, Streams: streams, URL: url, }, }, nil } -func tumblrVideoDownload(url, html, title string) ([]downloader.Data, error) { +func tumblrVideoDownload(url, html, title string) ([]*types.Data, error) { videoURLs := utils.MatchOneOf(html, `