diff --git a/.golangci.yml b/.golangci.yml
new file mode 100644
index 000000000..856e932cb
--- /dev/null
+++ b/.golangci.yml
@@ -0,0 +1,24 @@
+run:
+ concurrency: 2
+ timeout: 5m
+
+linter-settings:
+ goconst:
+ min-len: 2
+ min-occurrences: 2
+
+linters:
+ enable:
+ - golint
+ - goconst
+ - gofmt
+ - goimports
+ - misspell
+ - unparam
+
+issues:
+ exclude-use-default: false
+ exclude-rules:
+ - path: _test.go
+ linters:
+ - errcheck
diff --git a/.travis.yml b/.travis.yml
index 11aa9f3d4..24d13c182 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,7 +6,11 @@ language: go
go:
- "1.13.x"
+before_install:
+ - curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b $(go env GOPATH)/bin v1.24.0
+
script:
+ - golangci-lint run
- ./go.test.sh
after_success:
diff --git a/config/config.go b/config/config.go
index 9522db9c2..61e3d82b0 100644
--- a/config/config.go
+++ b/config/config.go
@@ -1,62 +1,5 @@
package config
-var (
- // Debug debug mode
- Debug bool
- // Version show version
- Version bool
- // InfoOnly Information only mode
- InfoOnly bool
- // Cookie http cookies
- Cookie string
- // Playlist download playlist
- Playlist bool
- // Refer use specified Referrer
- Refer string
- // Stream select specified stream to download
- Stream string
- // OutputPath output file path
- OutputPath string
- // OutputName output file name
- OutputName string
- // ExtractedData print extracted data
- ExtractedData bool
- // ChunkSizeMB HTTP chunk size for downloading (in MB)
- ChunkSizeMB int
- // UseAria2RPC Use Aria2 RPC to download
- UseAria2RPC bool
- // Aria2Token Aria2 RPC Token
- Aria2Token string
- // Aria2Addr Aria2 Address (default "localhost:6800")
- Aria2Addr string
- // Aria2Method Aria2 Method (default "http")
- Aria2Method string
- // ThreadNumber The number of download thread (only works for multiple-parts video)
- ThreadNumber int
- // File URLs file path
- File string
- // ItemStart Define the starting item of a playlist or a file input
- ItemStart int
- // ItemEnd Define the ending item of a playlist or a file input
- ItemEnd int
- // Items Define wanted items from a file or playlist. Separated by commas like: 1,5,6,8-10
- Items string
- // File name of each bilibili episode doesn't include the playlist title
- EpisodeTitleOnly bool
- // Caption download captions
- Caption bool
- // YoukuCcode youku ccode
- YoukuCcode string
- // YoukuCkey youku ckey
- YoukuCkey string
- // YoukuPassword youku password
- YoukuPassword string
- // RetryTimes how many times to retry when the download failed
- RetryTimes int
-
- MultiThread bool
-)
-
// FakeHeaders fake http headers
var FakeHeaders = map[string]string{
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
diff --git a/downloader/downloader.go b/downloader/downloader.go
index 383274fb6..6bc2a49f0 100644
--- a/downloader/downloader.go
+++ b/downloader/downloader.go
@@ -17,11 +17,38 @@ import (
"github.com/cheggaaa/pb"
- "github.com/iawia002/annie/config"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
)
+// Options defines options used in downloading.
+type Options struct {
+ InfoOnly bool
+ Stream string
+ Refer string
+ OutputPath string
+ OutputName string
+ FileNameLength int
+ Caption bool
+
+ MultiThread bool
+ ThreadNumber int
+ RetryTimes int
+ ChunkSizeMB int
+ // Aria2
+ UseAria2RPC bool
+ Aria2Token string
+ Aria2Method string
+ Aria2Addr string
+}
+
+// Downloader is the default downloader.
+type Downloader struct {
+ bar *pb.ProgressBar
+ option Options
+}
+
func progressBar(size int64) *pb.ProgressBar {
bar := pb.New64(size).SetUnits(pb.U_BYTES).SetRefreshRate(time.Millisecond * 10)
bar.ShowSpeed = true
@@ -30,17 +57,27 @@ func progressBar(size int64) *pb.ProgressBar {
return bar
}
-// Caption download danmaku, subtitles, etc
-func Caption(url, refer, fileName, ext string) error {
- if !config.Caption || config.InfoOnly {
- return nil
+// New returns a new Downloader implementation.
+func New(option Options) *Downloader {
+ downloader := &Downloader{
+ option: option,
}
+ return downloader
+}
+
+// caption downloads danmaku, subtitles, etc
+func (downloader *Downloader) caption(url, fileName, ext string) error {
fmt.Println("\nDownloading captions...")
+
+ refer := downloader.option.Refer
+ if refer == "" {
+ refer = url
+ }
body, err := request.GetByte(url, refer, nil)
if err != nil {
return err
}
- filePath, err := utils.FilePath(fileName, ext, true)
+ filePath, err := utils.FilePath(fileName, ext, downloader.option.FileNameLength, downloader.option.OutputPath, true)
if err != nil {
return err
}
@@ -48,7 +85,7 @@ func Caption(url, refer, fileName, ext string) error {
if fileError != nil {
return fileError
}
- defer file.Close()
+ defer file.Close() // nolint
if _, err = file.Write(body); err != nil {
return err
@@ -56,16 +93,14 @@ func Caption(url, refer, fileName, ext string) error {
return nil
}
-func writeFile(
- url string, file *os.File, headers map[string]string, bar *pb.ProgressBar,
-) (int64, error) {
+func (downloader *Downloader) writeFile(url string, file *os.File, headers map[string]string) (int64, error) {
res, err := request.Request(http.MethodGet, url, nil, headers)
if err != nil {
return 0, err
}
- defer res.Body.Close()
+ defer res.Body.Close() // nolint
- writer := io.MultiWriter(file, bar)
+ writer := io.MultiWriter(file, downloader.bar)
// Note that io.Copy reads 32kb(maximum) from input and writes them to output, then repeats.
// So don't worry about memory.
written, copyErr := io.Copy(writer, res.Body)
@@ -75,12 +110,8 @@ func writeFile(
return written, nil
}
-// Save save url file
-func Save(
- urlData URL, refer, fileName string, bar *pb.ProgressBar, chunkSizeMB int,
-) error {
- var err error
- filePath, err := utils.FilePath(fileName, urlData.Ext, false)
+func (downloader *Downloader) save(part *types.Part, fileName string) error {
+ filePath, err := utils.FilePath(fileName, part.Ext, downloader.option.FileNameLength, downloader.option.OutputPath, false)
if err != nil {
return err
}
@@ -88,24 +119,19 @@ func Save(
if err != nil {
return err
}
- if bar == nil {
- bar = progressBar(urlData.Size)
- bar.Start()
- }
// Skip segment file
// TODO: Live video URLs will not return the size
- if exists && fileSize == urlData.Size {
- bar.Add64(fileSize)
+ if exists && fileSize == part.Size {
+ downloader.bar.Add64(fileSize)
return nil
}
+
tempFilePath := filePath + ".download"
tempFileSize, _, err := utils.FileSize(tempFilePath)
if err != nil {
return err
}
- headers := map[string]string{
- "Referer": refer,
- }
+ headers := make(map[string]string, 1)
var (
file *os.File
fileError error
@@ -114,7 +140,7 @@ func Save(
// range start from 0, 0-1023 means the first 1024 bytes of the file
headers["Range"] = fmt.Sprintf("bytes=%d-", tempFileSize)
file, fileError = os.OpenFile(tempFilePath, os.O_APPEND|os.O_WRONLY, 0644)
- bar.Add64(tempFileSize)
+ downloader.bar.Add64(tempFileSize)
} else {
file, fileError = os.Create(tempFilePath)
}
@@ -126,16 +152,16 @@ func Save(
defer func() {
// must close the file before rename or it will cause
// `The process cannot access the file because it is being used by another process.` error.
- file.Close()
+ file.Close() // nolint
if err == nil {
- os.Rename(tempFilePath, filePath)
+ os.Rename(tempFilePath, filePath) // nolint
}
}()
- if chunkSizeMB > 0 {
+ if downloader.option.ChunkSizeMB > 0 {
var start, end, chunkSize int64
- chunkSize = int64(chunkSizeMB) * 1024 * 1024
- remainingSize := urlData.Size
+ chunkSize = int64(downloader.option.ChunkSizeMB) * 1024 * 1024
+ remainingSize := part.Size
if tempFileSize > 0 {
start = tempFileSize
remainingSize -= tempFileSize
@@ -150,10 +176,10 @@ func Save(
headers["Range"] = fmt.Sprintf("bytes=%d-%d", start, end)
temp := start
for i := 0; ; i++ {
- written, err := writeFile(urlData.URL, file, headers, bar)
+ written, err := downloader.writeFile(part.URL, file, headers)
if err == nil {
break
- } else if i+1 >= config.RetryTimes {
+ } else if i+1 >= downloader.option.RetryTimes {
return err
}
temp += written
@@ -165,10 +191,10 @@ func Save(
} else {
temp := tempFileSize
for i := 0; ; i++ {
- written, err := writeFile(urlData.URL, file, headers, bar)
+ written, err := downloader.writeFile(part.URL, file, headers)
if err == nil {
break
- } else if i+1 >= config.RetryTimes {
+ } else if i+1 >= downloader.option.RetryTimes {
return err
}
temp += written
@@ -180,10 +206,8 @@ func Save(
return nil
}
-func MultiThreadSave(
- urlData URL, refer, fileName string, bar *pb.ProgressBar, chunkSizeMB, threadNum int,
-) error {
- filePath, err := utils.FilePath(fileName, urlData.Ext, false)
+func (downloader *Downloader) multiThreadSave(dataPart *types.Part, fileName string) error {
+ filePath, err := utils.FilePath(fileName, dataPart.Ext, downloader.option.FileNameLength, downloader.option.OutputPath, false)
if err != nil {
return err
}
@@ -191,14 +215,11 @@ func MultiThreadSave(
if err != nil {
return err
}
- if bar == nil {
- bar = progressBar(urlData.Size)
- bar.Start()
- }
+
// Skip segment file
// TODO: Live video URLs will not return the size
- if exists && fileSize == urlData.Size {
- bar.Add64(fileSize)
+ if exists && fileSize == dataPart.Size {
+ downloader.bar.Add64(fileSize)
return nil
}
tmpFilePath := filePath + ".download"
@@ -207,19 +228,18 @@ func MultiThreadSave(
return err
}
if tmpExists {
- if tmpFileSize == urlData.Size {
- bar.Add64(urlData.Size)
+ if tmpFileSize == dataPart.Size {
+ downloader.bar.Add64(dataPart.Size)
return os.Rename(tmpFilePath, filePath)
- } else {
- err = os.Remove(tmpFilePath)
- if err != nil {
- return err
- }
+ }
+
+ if err = os.Remove(tmpFilePath); err != nil {
+ return err
}
}
// Scan all parts
- parts, err := readDirAllFilePart(filePath, fileName, urlData.Ext)
+ parts, err := readDirAllFilePart(filePath, fileName, dataPart.Ext)
if err != nil {
return err
}
@@ -259,11 +279,11 @@ func MultiThreadSave(
}
lastEnd = part.End
}
- if lastEnd != urlData.Size-1 {
+ if lastEnd != dataPart.Size-1 {
newPart := &FilePartMeta{
Index: parts[len(parts)-1].Index + 1,
Start: lastEnd + 1,
- End: urlData.Size - 1,
+ End: dataPart.Size - 1,
Cur: lastEnd + 1,
}
parts = append(parts, newPart)
@@ -272,14 +292,14 @@ func MultiThreadSave(
} else {
var start, end, partSize int64
var i float32
- partSize = urlData.Size / int64(threadNum)
+ partSize = dataPart.Size / int64(downloader.option.ThreadNumber)
i = 0
- for start < urlData.Size {
+ for start < dataPart.Size {
end = start + partSize - 1
- if end > urlData.Size {
- end = urlData.Size - 1
- } else if int(i+1) == threadNum && end < urlData.Size {
- end = urlData.Size - 1
+ if end > dataPart.Size {
+ end = dataPart.Size - 1
+ } else if int(i+1) == downloader.option.ThreadNumber && end < dataPart.Size {
+ end = dataPart.Size - 1
}
part := &FilePartMeta{
Index: i,
@@ -294,13 +314,13 @@ func MultiThreadSave(
}
}
if savedSize > 0 {
- bar.Add64(savedSize)
- if savedSize == urlData.Size {
+ downloader.bar.Add64(savedSize)
+ if savedSize == dataPart.Size {
return mergeMultiPart(filePath, parts)
}
}
- wgp := utils.NewWaitGroupPool(threadNum)
+ wgp := utils.NewWaitGroupPool(downloader.option.ThreadNumber)
var errs []error
for _, part := range unfinishedPart {
wgp.Add()
@@ -311,17 +331,16 @@ func MultiThreadSave(
return
}
defer func() {
- file.Close()
+ file.Close() // nolint
wgp.Done()
}()
+
var end, chunkSize int64
- headers := map[string]string{
- "Referer": refer,
- }
- if chunkSizeMB <= 0 {
+ headers := make(map[string]string, 1)
+ if downloader.option.ChunkSizeMB <= 0 {
chunkSize = part.End - part.Start + 1
} else {
- chunkSize = int64(chunkSizeMB) * 1024 * 1024
+ chunkSize = int64(downloader.option.ChunkSizeMB) * 1024 * 1024
}
end = computeEnd(part.Cur, chunkSize, part.End)
remainingSize := part.End - part.Cur + 1
@@ -338,11 +357,11 @@ func MultiThreadSave(
headers["Range"] = fmt.Sprintf("bytes=%d-%d", part.Cur, end)
temp := part.Cur
for i := 0; ; i++ {
- written, err := writeFile(urlData.URL, file, headers, bar)
+ written, err := downloader.writeFile(dataPart.URL, file, headers)
if err == nil {
remainingSize -= chunkSize
break
- } else if i+1 >= config.RetryTimes {
+ } else if i+1 >= downloader.option.RetryTimes {
errs = append(errs, err)
return
}
@@ -379,7 +398,7 @@ func readDirAllFilePart(filePath, filename, extname string) ([]*FilePartMeta, er
if err != nil {
return nil, err
}
- defer dir.Close()
+ defer dir.Close() // nolint
fns, err := dir.Readdir(0)
if err != nil {
return nil, err
@@ -408,14 +427,14 @@ func parseFilePartMeta(filepath string, fileSize int64) (*FilePartMeta, error) {
if err != nil {
return nil, err
}
- defer file.Close()
+ defer file.Close() // nolint
var buf [512]byte
readSize, err := file.ReadAt(buf[0:size], 0)
if err != nil && err != io.EOF {
return nil, err
}
if readSize < size {
- return nil, fmt.Errorf("The file has been broked, please delete all part files and re-download.\n")
+ return nil, fmt.Errorf("the file has been broked, please delete all part files and re-download")
}
err = binary.Read(bytes.NewBuffer(buf[:size]), binary.LittleEndian, meta)
if err != nil {
@@ -439,8 +458,8 @@ func mergeMultiPart(filepath string, parts []*FilePartMeta) error {
var partFiles []*os.File
defer func() {
for _, f := range partFiles {
- f.Close()
- os.Remove(f.Name())
+ f.Close() // nolint
+ os.Remove(f.Name()) // nolint
}
}()
for _, part := range parts {
@@ -458,88 +477,89 @@ func mergeMultiPart(filepath string, parts []*FilePartMeta) error {
return err
}
}
- tempFile.Close()
+ tempFile.Close() // nolint
err = os.Rename(tempFilePath, filepath)
return err
}
+func (downloader *Downloader) aria2(title string, stream *types.Stream) error {
+ rpcData := Aria2RPCData{
+ JSONRPC: "2.0",
+ ID: "annie", // can be modified
+ Method: "aria2.addUri",
+ }
+ rpcData.Params[0] = "token:" + downloader.option.Aria2Token
+ var urls []string
+ for _, p := range stream.Parts {
+ urls = append(urls, p.URL)
+ }
+ var inputs Aria2Input
+ inputs.Header = append(inputs.Header, "Referer: "+downloader.option.Refer)
+ for i := range urls {
+ rpcData.Params[1] = urls[i : i+1]
+ inputs.Out = fmt.Sprintf("%s[%d].%s", title, i, stream.Parts[0].Ext)
+ rpcData.Params[2] = &inputs
+ jsonData, err := json.Marshal(rpcData)
+ if err != nil {
+ return err
+ }
+ reqURL := fmt.Sprintf("%s://%s/jsonrpc", downloader.option.Aria2Method, downloader.option.Aria2Addr)
+ req, err := http.NewRequest(http.MethodPost, reqURL, bytes.NewBuffer(jsonData))
+ if err != nil {
+ return err
+ }
+ req.Header.Set("Content-Type", "application/json")
+
+ var client = http.Client{Timeout: 30 * time.Second}
+ res, err := client.Do(req)
+ if err != nil {
+ return err
+ }
+ // The http Client and Transport guarantee that Body is always
+ // non-nil, even on responses without a body or responses with
+ // a zero-length body.
+ res.Body.Close() // nolint
+ }
+ return nil
+}
+
// Download download urls
-func Download(v Data, refer string, chunkSizeMB int) error {
- v.genSortedStreams()
- var (
- title string
- stream string
- )
- if config.OutputName == "" {
- title = utils.FileName(v.Title, "")
- } else {
- title = utils.FileName(config.OutputName, "")
+func (downloader *Downloader) Download(data *types.Data) error {
+ sortedStreams := genSortedStreams(data.Streams)
+ if downloader.option.InfoOnly {
+ printInfo(data, sortedStreams)
+ return nil
}
- if config.Stream == "" {
- stream = v.sortedStreams[0].name
- } else {
- stream = config.Stream
+
+ title := downloader.option.OutputName
+ if title == "" {
+ title = data.Title
}
- data, ok := v.Streams[stream]
+ title = utils.FileName(title, "", downloader.option.FileNameLength)
+
+ streamName := downloader.option.Stream
+ if streamName == "" {
+ streamName = sortedStreams[0].ID
+ }
+ stream, ok := data.Streams[streamName]
if !ok {
- return fmt.Errorf("no stream named %s", stream)
+ return fmt.Errorf("no stream named %s", streamName)
}
- v.printInfo(stream) // if InfoOnly, this func will print all streams info
- if config.InfoOnly {
- return nil
+
+ printStreamInfo(data, stream)
+
+ // download caption
+ if downloader.option.Caption && data.Caption != nil {
+ downloader.caption(data.Caption.URL, title, data.Caption.Ext) // nolint
}
- // Use aria2 rpc to download
- if config.UseAria2RPC {
- rpcData := Aria2RPCData{
- JSONRPC: "2.0",
- ID: "annie", // can be modified
- Method: "aria2.addUri",
- }
- rpcData.Params[0] = "token:" + config.Aria2Token
- var urls []string
- for _, p := range data.URLs {
- urls = append(urls, p.URL)
- }
- var inputs Aria2Input
- inputs.Header = append(inputs.Header, "Referer: "+refer)
- for i := range urls {
- rpcData.Params[1] = urls[i : i+1]
- inputs.Out = fmt.Sprintf("%s[%d].%s", title, i, data.URLs[0].Ext)
- rpcData.Params[2] = &inputs
- jsonData, err := json.Marshal(rpcData)
- if err != nil {
- return err
- }
- reqURL := fmt.Sprintf("%s://%s/jsonrpc", config.Aria2Method, config.Aria2Addr)
- req, err := http.NewRequest(http.MethodPost, reqURL, bytes.NewBuffer(jsonData))
- if err != nil {
- return err
- }
- req.Header.Set("Content-Type", "application/json")
- var client = http.Client{Timeout: 30 * time.Second}
- res, err := client.Do(req)
- if err != nil {
- return err
- }
- // The http Client and Transport guarantee that Body is always
- // non-nil, even on responses without a body or responses with
- // a zero-length body.
- res.Body.Close()
- }
- return nil
+ // Use aria2 rpc to download
+ if downloader.option.UseAria2RPC {
+ return downloader.aria2(title, stream)
}
// Skip the complete file that has been merged
- var (
- mergedFilePath string
- err error
- )
- if v.Site == "YouTube youtube.com" {
- mergedFilePath, err = utils.FilePath(title, data.URLs[0].Ext, false)
- } else {
- mergedFilePath, err = utils.FilePath(title, "mp4", false)
- }
+ mergedFilePath, err := utils.FilePath(title, stream.Ext, downloader.option.FileNameLength, downloader.option.OutputPath, false)
if err != nil {
return err
}
@@ -552,67 +572,66 @@ func Download(v Data, refer string, chunkSizeMB int) error {
fmt.Printf("%s: file already exists, skipping\n", mergedFilePath)
return nil
}
- bar := progressBar(data.Size)
- bar.Start()
- if len(data.URLs) == 1 {
+
+ downloader.bar = progressBar(stream.Size)
+ downloader.bar.Start()
+ if len(stream.Parts) == 1 {
// only one fragment
var err error
- if config.MultiThread {
- err = MultiThreadSave(data.URLs[0], refer, title, bar, chunkSizeMB, config.ThreadNumber)
+ if downloader.option.MultiThread {
+ err = downloader.multiThreadSave(stream.Parts[0], title)
} else {
- err = Save(data.URLs[0], refer, title, bar, chunkSizeMB)
+ err = downloader.save(stream.Parts[0], title)
}
if err != nil {
return err
}
- bar.Finish()
+ downloader.bar.Finish()
return nil
}
- wgp := utils.NewWaitGroupPool(config.ThreadNumber)
+
+ wgp := utils.NewWaitGroupPool(downloader.option.ThreadNumber)
// multiple fragments
errs := make([]error, 0)
lock := sync.Mutex{}
- parts := make([]string, len(data.URLs))
- for index, url := range data.URLs {
+ parts := make([]string, len(stream.Parts))
+ for index, part := range stream.Parts {
if len(errs) > 0 {
break
}
partFileName := fmt.Sprintf("%s[%d]", title, index)
- partFilePath, err := utils.FilePath(partFileName, url.Ext, false)
+ partFilePath, err := utils.FilePath(partFileName, part.Ext, downloader.option.FileNameLength, downloader.option.OutputPath, false)
if err != nil {
return err
}
parts[index] = partFilePath
wgp.Add()
- go func(url URL, refer, fileName string, bar *pb.ProgressBar) {
+ go func(part *types.Part, fileName string) {
defer wgp.Done()
- err := Save(url, refer, fileName, bar, chunkSizeMB)
+ err := downloader.save(part, fileName)
if err != nil {
lock.Lock()
errs = append(errs, err)
lock.Unlock()
}
- }(url, refer, partFileName, bar)
+ }(part, partFileName)
}
wgp.Wait()
if len(errs) > 0 {
return errs[0]
}
- bar.Finish()
+ downloader.bar.Finish()
- if v.Type != "video" {
+ if data.Type != types.DataTypeVideo {
return nil
}
- // merge
+
fmt.Printf("Merging video parts into %s\n", mergedFilePath)
- if v.Site == "YouTube youtube.com" {
- err = utils.MergeAudioAndVideo(parts, mergedFilePath)
- } else {
- err = utils.MergeToMP4(parts, mergedFilePath, title)
+ if stream.Ext == "mp4" {
+ return utils.MergeToMP4(parts, mergedFilePath, title)
}
-
- return err
+ return utils.MergeFilesWithSameExtension(parts, mergedFilePath)
}
diff --git a/downloader/downloader_test.go b/downloader/downloader_test.go
index 6a9a32260..c13f89e7f 100644
--- a/downloader/downloader_test.go
+++ b/downloader/downloader_test.go
@@ -1,31 +1,26 @@
package downloader
import (
- // "os"
"testing"
- "github.com/iawia002/annie/config"
+ "github.com/iawia002/annie/extractors/types"
)
-func init() {
- config.RetryTimes = 100
- config.ThreadNumber = 1
-}
-
func TestDownload(t *testing.T) {
testCases := []struct {
name string
- data Data
+ data *types.Data
}{
{
name: "normal test",
- data: Data{
+ data: &types.Data{
Site: "douyin",
Title: "test",
- Type: "video",
- Streams: map[string]Stream{
+ Type: types.DataTypeVideo,
+ Streams: map[string]*types.Stream{
"default": {
- URLs: []URL{
+ ID: "default",
+ Parts: []*types.Part{
{
URL: "https://aweme.snssdk.com/aweme/v1/playwm/?video_id=v0200f9a0000bc117isuatl67cees890&line=0",
Size: 4927877,
@@ -38,13 +33,14 @@ func TestDownload(t *testing.T) {
},
{
name: "multi-stream test",
- data: Data{
+ data: &types.Data{
Site: "douyin",
Title: "test2",
- Type: "video",
- Streams: map[string]Stream{
+ Type: types.DataTypeVideo,
+ Streams: map[string]*types.Stream{
"miaopai": {
- URLs: []URL{
+ ID: "miaopai",
+ Parts: []*types.Part{
{
URL: "https://txycdn.miaopai.com/stream/KwR26jUGh2ySnVjYbQiFmomNjP14LtMU3vi6sQ__.mp4?ssig=6594aa01a78e78f50c65c164d186ba9e&time_stamp=1537070910786",
Size: 4011590,
@@ -54,7 +50,8 @@ func TestDownload(t *testing.T) {
Size: 4011590,
},
"douyin": {
- URLs: []URL{
+ ID: "douyin",
+ Parts: []*types.Part{
{
URL: "https://aweme.snssdk.com/aweme/v1/playwm/?video_id=v0200f9a0000bc117isuatl67cees890&line=0",
Size: 4927877,
@@ -68,13 +65,14 @@ func TestDownload(t *testing.T) {
},
{
name: "image test",
- data: Data{
+ data: &types.Data{
Site: "bcy",
Title: "bcy image test",
- Type: "image",
- Streams: map[string]Stream{
+ Type: types.DataTypeImage,
+ Streams: map[string]*types.Stream{
"default": {
- URLs: []URL{
+ ID: "default",
+ Parts: []*types.Part{
{
URL: "http://img5.bcyimg.com/coser/143767/post/c0j7x/0d713eb41a614053ac6a3b146914f6bc.jpg/w650",
Size: 56107,
@@ -92,7 +90,7 @@ func TestDownload(t *testing.T) {
},
}
for _, testCase := range testCases {
- err := Download(testCase.data, "", 10)
+ err := New(Options{}).Download(testCase.data)
if err != nil {
t.Error(err)
}
diff --git a/downloader/types.go b/downloader/types.go
index 7ba7c880c..b0f0c9f18 100644
--- a/downloader/types.go
+++ b/downloader/types.go
@@ -1,124 +1,6 @@
package downloader
-import (
- "fmt"
- "sort"
-
- "github.com/fatih/color"
-
- "github.com/iawia002/annie/config"
-)
-
-// URL data struct for single URL information
-type URL struct {
- URL string `json:"url"`
- Size int64 `json:"size"`
- Ext string `json:"ext"`
-}
-
-// Stream data struct for each stream
-type Stream struct {
- // [URL: {URL, Size, Ext}, ...]
- // Some video files have multiple fragments
- // and support for downloading multiple image files at once
- URLs []URL `json:"urls"`
- Quality string `json:"quality"`
- // total size of all urls
- Size int64 `json:"size"`
-
- // name used in sortedStreams
- name string
-}
-
-// Data data struct for video information
-type Data struct {
- Site string `json:"site"`
- Title string `json:"title"`
- Type string `json:"type"`
- // each stream has it's own URLs and Quality
- Streams map[string]Stream `json:"streams"`
- sortedStreams []Stream
-
- // Err is used to record whether an error occurred when extracting data.
- // It is used to record the error information corresponding to each url when extracting the list data.
- // NOTE(iawia002): err is only used in Data list
- Err error `json:"-"`
- // URL is used to record the address of this download
- URL string `json:"url"`
-}
-
-// EmptyData returns an "empty" Data object with the given URL and error
-func EmptyData(url string, err error) Data {
- return Data{
- URL: url,
- Err: err,
- }
-}
-
-func (data *Stream) calculateTotalSize() {
- var size int64
- for _, urlData := range data.URLs {
- size += urlData.Size
- }
- data.Size = size
-}
-
-func (data Stream) printStream() {
- blue := color.New(color.FgBlue)
- cyan := color.New(color.FgCyan)
- blue.Println(fmt.Sprintf(" [%s] -------------------", data.name))
- if data.Quality != "" {
- cyan.Printf(" Quality: ")
- fmt.Println(data.Quality)
- }
- cyan.Printf(" Size: ")
- if data.Size == 0 {
- data.calculateTotalSize()
- }
- fmt.Printf("%.2f MiB (%d Bytes)\n", float64(data.Size)/(1024*1024), data.Size)
- cyan.Printf(" # download with: ")
- fmt.Printf("annie -f %s ...\n\n", data.name)
-}
-
-func (v *Data) genSortedStreams() {
- for k, data := range v.Streams {
- if data.Size == 0 {
- data.calculateTotalSize()
- }
- data.name = k
- v.Streams[k] = data
- v.sortedStreams = append(v.sortedStreams, data)
- }
- if len(v.Streams) > 1 {
- sort.Slice(
- v.sortedStreams, func(i, j int) bool { return v.sortedStreams[i].Size > v.sortedStreams[j].Size },
- )
- }
-}
-
-func (v *Data) printInfo(stream string) {
- cyan := color.New(color.FgCyan)
- fmt.Println()
- cyan.Printf(" Site: ")
- fmt.Println(v.Site)
- cyan.Printf(" Title: ")
- fmt.Println(v.Title)
- cyan.Printf(" Type: ")
- fmt.Println(v.Type)
- if config.InfoOnly {
- cyan.Printf(" Streams: ")
- fmt.Println("# All available quality")
- for _, data := range v.sortedStreams {
- data.printStream()
- }
- } else {
- cyan.Printf(" Stream: ")
- fmt.Println()
- v.Streams[stream].printStream()
- }
-}
-
-// Aria2RPCData json RPC 2.0 for Aria2
+// Aria2RPCData defines the data structure of json RPC 2.0 info for Aria2
type Aria2RPCData struct {
// More info about RPC interface please refer to
// https://aria2.github.io/manual/en/html/aria2c.html#rpc-interface
@@ -130,7 +12,7 @@ type Aria2RPCData struct {
Params [3]interface{} `json:"params"`
}
-// Aria2Input options for `aria2.addUri`
+// Aria2Input is options for `aria2.addUri`
// https://aria2.github.io/manual/en/html/aria2c.html#id3
type Aria2Input struct {
// The file name of the downloaded file
@@ -139,6 +21,7 @@ type Aria2Input struct {
Header []string `json:"header"`
}
+// FilePartMeta defines the data structure of file meta info.
type FilePartMeta struct {
Index float32
Start int64
diff --git a/downloader/utils.go b/downloader/utils.go
new file mode 100644
index 000000000..fba9ea58c
--- /dev/null
+++ b/downloader/utils.go
@@ -0,0 +1,68 @@
+package downloader
+
+import (
+ "fmt"
+ "sort"
+
+ "github.com/fatih/color"
+
+ "github.com/iawia002/annie/extractors/types"
+)
+
+var (
+ blue = color.New(color.FgBlue)
+ cyan = color.New(color.FgCyan)
+)
+
+func genSortedStreams(streams map[string]*types.Stream) []*types.Stream {
+ sortedStreams := make([]*types.Stream, 0, len(streams))
+ for _, data := range streams {
+ sortedStreams = append(sortedStreams, data)
+ }
+ if len(sortedStreams) > 1 {
+ sort.Slice(
+ sortedStreams, func(i, j int) bool { return sortedStreams[i].Size > sortedStreams[j].Size },
+ )
+ }
+ return sortedStreams
+}
+
+func printHeader(data *types.Data) {
+ fmt.Println()
+ cyan.Printf(" Site: ") // nolint
+ fmt.Println(data.Site)
+ cyan.Printf(" Title: ") // nolint
+ fmt.Println(data.Title)
+ cyan.Printf(" Type: ") // nolint
+ fmt.Println(data.Type)
+}
+
+func printStream(stream *types.Stream) {
+ blue.Println(fmt.Sprintf(" [%s] -------------------", stream.ID)) // nolint
+ if stream.Quality != "" {
+ cyan.Printf(" Quality: ") // nolint
+ fmt.Println(stream.Quality)
+ }
+ cyan.Printf(" Size: ") // nolint
+ fmt.Printf("%.2f MiB (%d Bytes)\n", float64(stream.Size)/(1024*1024), stream.Size)
+ cyan.Printf(" # download with: ") // nolint
+ fmt.Printf("annie -f %s ...\n\n", stream.ID)
+}
+
+func printInfo(data *types.Data, sortedStreams []*types.Stream) {
+ printHeader(data)
+
+ cyan.Printf(" Streams: ") // nolint
+ fmt.Println("# All available quality")
+ for _, stream := range sortedStreams {
+ printStream(stream)
+ }
+}
+
+func printStreamInfo(data *types.Data, stream *types.Stream) {
+ printHeader(data)
+
+ cyan.Printf(" Stream: ") // nolint
+ fmt.Println()
+ printStream(stream)
+}
diff --git a/extractors/bcy/bcy.go b/extractors/bcy/bcy.go
index 8617998ce..a4958915c 100644
--- a/extractors/bcy/bcy.go
+++ b/extractors/bcy/bcy.go
@@ -5,8 +5,7 @@ import (
"fmt"
"strings"
- "github.com/iawia002/annie/downloader"
- "github.com/iawia002/annie/extractors"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/parser"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
@@ -22,8 +21,15 @@ type bcyData struct {
} `json:"detail"`
}
-// Extract is the main function for extracting data
-func Extract(url string) ([]downloader.Data, error) {
+type extractor struct{}
+
+// New returns a youtube extractor.
+func New() types.Extractor {
+ return &extractor{}
+}
+
+// Extract is the main function to extract the data.
+func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) {
html, err := request.Get(url, url, nil)
if err != nil {
return nil, err
@@ -33,7 +39,7 @@ func Extract(url string) ([]downloader.Data, error) {
rep := strings.NewReplacer(`\"`, `"`, `\\`, `\`)
realURLs := utils.MatchOneOf(html, `JSON.parse\("(.+?)"\);`)
if realURLs == nil || len(realURLs) < 2 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
jsonString := rep.Replace(realURLs[1])
@@ -48,7 +54,7 @@ func Extract(url string) ([]downloader.Data, error) {
}
title := strings.Replace(parser.Title(doc), " - 半次元 banciyuan - ACG爱好者社区", "", -1)
- urls := make([]downloader.URL, 0, len(data.Detail.PostData.Multi))
+ parts := make([]*types.Part, 0, len(data.Detail.PostData.Multi))
var totalSize int64
for _, img := range data.Detail.PostData.Multi {
size, err := request.Size(img.OriginalPath, url)
@@ -60,23 +66,23 @@ func Extract(url string) ([]downloader.Data, error) {
if err != nil {
return nil, err
}
- urls = append(urls, downloader.URL{
+ parts = append(parts, &types.Part{
URL: img.OriginalPath,
Size: size,
Ext: ext,
})
}
- streams := map[string]downloader.Stream{
+ streams := map[string]*types.Stream{
"default": {
- URLs: urls,
- Size: totalSize,
+ Parts: parts,
+ Size: totalSize,
},
}
- return []downloader.Data{
+ return []*types.Data{
{
Site: "半次元 bcy.net",
Title: title,
- Type: "image",
+ Type: types.DataTypeImage,
Streams: streams,
URL: url,
},
diff --git a/extractors/bcy/bcy_test.go b/extractors/bcy/bcy_test.go
index b785da7d1..fd021d58c 100644
--- a/extractors/bcy/bcy_test.go
+++ b/extractors/bcy/bcy_test.go
@@ -3,13 +3,11 @@ package bcy
import (
"testing"
- "github.com/iawia002/annie/config"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/test"
)
func TestDownload(t *testing.T) {
- config.InfoOnly = true
- config.RetryTimes = 100
tests := []struct {
name string
args test.Args
@@ -25,7 +23,7 @@ func TestDownload(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- data, err := Extract(tt.args.URL)
+ data, err := New().Extract(tt.args.URL, types.Options{})
test.CheckError(t, err)
test.Check(t, tt.args, data[0])
})
diff --git a/extractors/bilibili/bilibili.go b/extractors/bilibili/bilibili.go
index 05627bb33..231b3978a 100644
--- a/extractors/bilibili/bilibili.go
+++ b/extractors/bilibili/bilibili.go
@@ -7,9 +7,7 @@ import (
"strconv"
"strings"
- "github.com/iawia002/annie/config"
- "github.com/iawia002/annie/downloader"
- "github.com/iawia002/annie/extractors"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/parser"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
@@ -32,13 +30,13 @@ const referer = "https://www.bilibili.com"
var utoken string
-func genAPI(aid, cid int, bangumi bool, quality string, seasonType string) (string, error) {
+func genAPI(aid, cid int, bangumi bool, quality, seasonType, cookie string) (string, error) {
var (
err error
baseAPIURL string
params string
)
- if config.Cookie != "" && utoken == "" {
+ if cookie != "" && utoken == "" {
utoken, err = request.Get(
fmt.Sprintf("%said=%d&cid=%d", bilibiliTokenAPI, aid, cid),
referer,
@@ -83,18 +81,18 @@ func genAPI(aid, cid int, bangumi bool, quality string, seasonType string) (stri
return api, nil
}
-func genURL(durl []dURLData) ([]downloader.URL, int64) {
+func genParts(durl []dURLData) ([]*types.Part, int64) {
var size int64
- urls := make([]downloader.URL, len(durl))
+ parts := make([]*types.Part, len(durl))
for index, data := range durl {
size += data.Size
- urls[index] = downloader.URL{
+ parts[index] = &types.Part{
URL: data.URL,
Size: data.Size,
Ext: "flv",
}
}
- return urls, size
+ return parts, size
}
type bilibiliOptions struct {
@@ -105,16 +103,17 @@ type bilibiliOptions struct {
cid int
page int
subtitle string
+ cookie string
}
-func extractBangumi(url, html string) ([]downloader.Data, error) {
+func extractBangumi(url, html string, extractOption types.Options) ([]*types.Data, error) {
dataString := utils.MatchOneOf(html, `window.__INITIAL_STATE__=(.+?);\(function`)[1]
var data bangumiData
err := json.Unmarshal([]byte(dataString), &data)
if err != nil {
return nil, err
}
- if !config.Playlist {
+ if !extractOption.Playlist {
options := bilibiliOptions{
url: url,
html: html,
@@ -122,13 +121,13 @@ func extractBangumi(url, html string) ([]downloader.Data, error) {
aid: data.EpInfo.Aid,
cid: data.EpInfo.Cid,
}
- return []downloader.Data{bilibiliDownload(options)}, nil
+ return []*types.Data{bilibiliDownload(options, extractOption)}, nil
}
// handle bangumi playlist
- needDownloadItems := utils.NeedDownloadList(len(data.EpList))
- extractedData := make([]downloader.Data, len(needDownloadItems))
- wgp := utils.NewWaitGroupPool(config.ThreadNumber)
+ needDownloadItems := utils.NeedDownloadList(extractOption.Items, extractOption.ItemStart, extractOption.ItemEnd, len(data.EpList))
+ extractedData := make([]*types.Data, len(needDownloadItems))
+ wgp := utils.NewWaitGroupPool(extractOption.ThreadNumber)
dataIndex := 0
for index, u := range data.EpList {
if !utils.ItemInSlice(index+1, needDownloadItems) {
@@ -146,9 +145,9 @@ func extractBangumi(url, html string) ([]downloader.Data, error) {
aid: u.Aid,
cid: u.Cid,
}
- go func(index int, options bilibiliOptions, extractedData []downloader.Data) {
+ go func(index int, options bilibiliOptions, extractedData []*types.Data) {
defer wgp.Done()
- extractedData[index] = bilibiliDownload(options)
+ extractedData[index] = bilibiliDownload(options, extractOption)
}(dataIndex, options, extractedData)
dataIndex++
}
@@ -171,12 +170,12 @@ func getMultiPageData(html string) (*multiPage, error) {
return &data, nil
}
-func extractNormalVideo(url, html string) ([]downloader.Data, error) {
+func extractNormalVideo(url, html string, extractOption types.Options) ([]*types.Data, error) {
pageData, err := getMultiPageData(html)
if err != nil {
return nil, err
}
- if !config.Playlist {
+ if !extractOption.Playlist {
// handle URL that has a playlist, mainly for unified titles
//
tag does not include subtitles
// bangumi doesn't need this
@@ -191,7 +190,7 @@ func extractNormalVideo(url, html string) ([]downloader.Data, error) {
}
if len(pageData.VideoData.Pages) < p || p < 1 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
page := pageData.VideoData.Pages[p-1]
@@ -208,14 +207,14 @@ func extractNormalVideo(url, html string) ([]downloader.Data, error) {
} else {
options.subtitle = page.Part
}
- return []downloader.Data{bilibiliDownload(options)}, nil
+ return []*types.Data{bilibiliDownload(options, extractOption)}, nil
}
// handle normal video playlist
// https://www.bilibili.com/video/av20827366/?p=1
- needDownloadItems := utils.NeedDownloadList(len(pageData.VideoData.Pages))
- extractedData := make([]downloader.Data, len(needDownloadItems))
- wgp := utils.NewWaitGroupPool(config.ThreadNumber)
+ needDownloadItems := utils.NeedDownloadList(extractOption.Items, extractOption.ItemStart, extractOption.ItemEnd, len(pageData.VideoData.Pages))
+ extractedData := make([]*types.Data, len(needDownloadItems))
+ wgp := utils.NewWaitGroupPool(extractOption.ThreadNumber)
dataIndex := 0
for index, u := range pageData.VideoData.Pages {
if !utils.ItemInSlice(index+1, needDownloadItems) {
@@ -230,9 +229,9 @@ func extractNormalVideo(url, html string) ([]downloader.Data, error) {
subtitle: u.Part,
page: u.Page,
}
- go func(index int, options bilibiliOptions, extractedData []downloader.Data) {
+ go func(index int, options bilibiliOptions, extractedData []*types.Data) {
defer wgp.Done()
- extractedData[index] = bilibiliDownload(options)
+ extractedData[index] = bilibiliDownload(options, extractOption)
}(dataIndex, options, extractedData)
dataIndex++
}
@@ -240,8 +239,15 @@ func extractNormalVideo(url, html string) ([]downloader.Data, error) {
return extractedData, nil
}
-// Extract is the main function for extracting data
-func Extract(url string) ([]downloader.Data, error) {
+type extractor struct{}
+
+// New returns a youtube extractor.
+func New() types.Extractor {
+ return &extractor{}
+}
+
+// Extract is the main function to extract the data.
+func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) {
var err error
html, err := request.Get(url, referer, nil)
if err != nil {
@@ -249,14 +255,14 @@ func Extract(url string) ([]downloader.Data, error) {
}
if strings.Contains(url, "bangumi") {
// handle bangumi
- return extractBangumi(url, html)
+ return extractBangumi(url, html, option)
}
// handle normal video
- return extractNormalVideo(url, html)
+ return extractNormalVideo(url, html, option)
}
// bilibiliDownload is the download function for a single URL
-func bilibiliDownload(options bilibiliOptions) downloader.Data {
+func bilibiliDownload(options bilibiliOptions, extractOption types.Options) *types.Data {
var (
err error
html string
@@ -268,7 +274,7 @@ func bilibiliDownload(options bilibiliOptions) downloader.Data {
} else {
html, err = request.Get(options.url, referer, nil)
if err != nil {
- return downloader.EmptyData(options.url, err)
+ return types.EmptyData(options.url, err)
}
}
if options.bangumi {
@@ -278,34 +284,34 @@ func bilibiliDownload(options bilibiliOptions) downloader.Data {
// Get "accept_quality" and "accept_description"
// "accept_description":["高清 1080P","高清 720P","清晰 480P","流畅 360P"],
// "accept_quality":[80,48,32,16],
- api, err := genAPI(options.aid, options.cid, options.bangumi, "15", seasonType)
+ api, err := genAPI(options.aid, options.cid, options.bangumi, "15", seasonType, options.cookie)
if err != nil {
- return downloader.EmptyData(options.url, err)
+ return types.EmptyData(options.url, err)
}
jsonString, err := request.Get(api, referer, nil)
if err != nil {
- return downloader.EmptyData(options.url, err)
+ return types.EmptyData(options.url, err)
}
var quality qualityInfo
err = json.Unmarshal([]byte(jsonString), &quality)
if err != nil {
- return downloader.EmptyData(options.url, err)
+ return types.EmptyData(options.url, err)
}
- streams := make(map[string]downloader.Stream, len(quality.Quality))
+ streams := make(map[string]*types.Stream, len(quality.Quality))
for _, q := range quality.Quality {
- apiURL, err := genAPI(options.aid, options.cid, options.bangumi, strconv.Itoa(q), seasonType)
+ apiURL, err := genAPI(options.aid, options.cid, options.bangumi, strconv.Itoa(q), seasonType, options.cookie)
if err != nil {
- return downloader.EmptyData(options.url, err)
+ return types.EmptyData(options.url, err)
}
jsonString, err := request.Get(apiURL, referer, nil)
if err != nil {
- return downloader.EmptyData(options.url, err)
+ return types.EmptyData(options.url, err)
}
var data bilibiliData
err = json.Unmarshal([]byte(jsonString), &data)
if err != nil {
- return downloader.EmptyData(options.url, err)
+ return types.EmptyData(options.url, err)
}
// Avoid duplicate streams
@@ -313,9 +319,9 @@ func bilibiliDownload(options bilibiliOptions) downloader.Data {
continue
}
- urls, size := genURL(data.DURL)
- streams[strconv.Itoa(data.Quality)] = downloader.Stream{
- URLs: urls,
+ parts, size := genParts(data.DURL)
+ streams[strconv.Itoa(data.Quality)] = &types.Stream{
+ Parts: parts,
Size: size,
Quality: qualityString[data.Quality],
}
@@ -324,30 +330,26 @@ func bilibiliDownload(options bilibiliOptions) downloader.Data {
// get the title
doc, err := parser.GetDoc(html)
if err != nil {
- return downloader.EmptyData(options.url, err)
+ return types.EmptyData(options.url, err)
}
title := parser.Title(doc)
if options.subtitle != "" {
- if config.EpisodeTitleOnly {
+ if extractOption.EpisodeTitleOnly {
title = fmt.Sprintf("P%d %s", options.page, options.subtitle)
} else {
title = fmt.Sprintf("%s P%d %s", title, options.page, options.subtitle)
}
}
- err = downloader.Caption(
- fmt.Sprintf("https://comment.bilibili.com/%d.xml", options.cid),
- options.url, title, "xml",
- )
- if err != nil {
- return downloader.EmptyData(options.url, err)
- }
-
- return downloader.Data{
+ return &types.Data{
Site: "哔哩哔哩 bilibili.com",
Title: title,
- Type: "video",
+ Type: types.DataTypeVideo,
Streams: streams,
- URL: options.url,
+ Caption: &types.Part{
+ URL: fmt.Sprintf("https://comment.bilibili.com/%d.xml", options.cid),
+ Ext: "xml",
+ },
+ URL: options.url,
}
}
diff --git a/extractors/bilibili/bilibili_test.go b/extractors/bilibili/bilibili_test.go
index a1b87fd05..6e6b90633 100644
--- a/extractors/bilibili/bilibili_test.go
+++ b/extractors/bilibili/bilibili_test.go
@@ -3,14 +3,11 @@ package bilibili
import (
"testing"
- "github.com/iawia002/annie/config"
- "github.com/iawia002/annie/downloader"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/test"
)
func TestBilibili(t *testing.T) {
- config.InfoOnly = true
- config.ThreadNumber = 9 // travis out of memory issue
tests := []struct {
name string
args test.Args
@@ -72,18 +69,18 @@ func TestBilibili(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var (
- data []downloader.Data
+ data []*types.Data
err error
)
-
if tt.playlist {
// for playlist, we don't check the data
- config.Playlist = true
- _, err = Extract(tt.args.URL)
+ _, err = New().Extract(tt.args.URL, types.Options{
+ Playlist: true,
+ ThreadNumber: 9,
+ })
test.CheckError(t, err)
} else {
- config.Playlist = false
- data, err = Extract(tt.args.URL)
+ data, err = New().Extract(tt.args.URL, types.Options{})
test.CheckError(t, err)
test.Check(t, tt.args, data[0])
}
diff --git a/extractors/defs.go b/extractors/defs.go
deleted file mode 100644
index 99777b8fc..000000000
--- a/extractors/defs.go
+++ /dev/null
@@ -1,8 +0,0 @@
-package extractors
-
-import (
- "errors"
-)
-
-var ErrURLParseFailed = errors.New("url parse failed")
-var ErrLoginRequired = errors.New("login required")
diff --git a/extractors/douyin/douyin.go b/extractors/douyin/douyin.go
index b92db9f51..911ee60d7 100644
--- a/extractors/douyin/douyin.go
+++ b/extractors/douyin/douyin.go
@@ -1,29 +1,39 @@
package douyin
import (
- "github.com/iawia002/annie/downloader"
- "github.com/iawia002/annie/extractors"
+ "encoding/json"
+ "errors"
+ "fmt"
+
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
)
-// Extract is the main function for extracting data
-func Extract(url string) ([]downloader.Data, error) {
+type data struct {
+ ItemList []struct {
+ Desc string `json:"desc"`
+ } `json:"item_list"`
+}
+
+type extractor struct{}
+
+// New returns a youtube extractor.
+func New() types.Extractor {
+ return &extractor{}
+}
+
+// Extract is the main function to extract the data.
+func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) {
var err error
html, err := request.Get(url, url, nil)
if err != nil {
return nil, err
}
- var title string
- desc := utils.MatchOneOf(html, `
(.+?)
`)
- if desc != nil {
- title = desc[1]
- } else {
- title = "抖音短视频"
- }
+
realURLs := utils.MatchOneOf(html, `playAddr: "(.+?)"`)
if realURLs == nil || len(realURLs) < 2 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
realURL := realURLs[1]
@@ -31,22 +41,48 @@ func Extract(url string) ([]downloader.Data, error) {
if err != nil {
return nil, err
}
- urlData := downloader.URL{
+ urlData := &types.Part{
URL: realURL,
Size: size,
Ext: "mp4",
}
- streams := map[string]downloader.Stream{
+ streams := map[string]*types.Stream{
"default": {
- URLs: []downloader.URL{urlData},
- Size: size,
+ Parts: []*types.Part{urlData},
+ Size: size,
},
}
- return []downloader.Data{
+
+ videoIDs := utils.MatchOneOf(url, `/video/(\d+)`)
+ if len(videoIDs) == 0 {
+ return nil, errors.New("unable to get video ID")
+ }
+ videoID := videoIDs[1]
+
+ dytks := utils.MatchOneOf(html, `dytk: "(.+?)"`)
+ if len(dytks) == 0 {
+ return nil, errors.New("unable to get dytk info")
+ }
+ dytk := dytks[1]
+
+ apiDataString, err := request.Get(
+ fmt.Sprintf("https://www.douyin.com/web/api/v2/aweme/iteminfo/?item_ids=%s&dytk=%s", videoID, dytk),
+ url, nil,
+ )
+ if err != nil {
+ return nil, err
+ }
+
+ var apiData data
+ if err = json.Unmarshal([]byte(apiDataString), &apiData); err != nil {
+ return nil, err
+ }
+
+ return []*types.Data{
{
Site: "抖音 douyin.com",
- Title: title,
- Type: "video",
+ Title: apiData.ItemList[0].Desc,
+ Type: types.DataTypeVideo,
Streams: streams,
URL: url,
},
diff --git a/extractors/douyin/douyin_test.go b/extractors/douyin/douyin_test.go
index ca10121c3..e6f09690b 100644
--- a/extractors/douyin/douyin_test.go
+++ b/extractors/douyin/douyin_test.go
@@ -3,13 +3,11 @@ package douyin
import (
"testing"
- "github.com/iawia002/annie/config"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/test"
)
func TestDownload(t *testing.T) {
- config.InfoOnly = true
- config.RetryTimes = 100
tests := []struct {
name string
args test.Args
@@ -24,7 +22,7 @@ func TestDownload(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- data, err := Extract(tt.args.URL)
+ data, err := New().Extract(tt.args.URL, types.Options{})
test.CheckError(t, err)
test.Check(t, tt.args, data[0])
})
diff --git a/extractors/douyu/douyu.go b/extractors/douyu/douyu.go
index 4f512fc13..2052d8eea 100644
--- a/extractors/douyu/douyu.go
+++ b/extractors/douyu/douyu.go
@@ -4,8 +4,7 @@ import (
"encoding/json"
"errors"
- "github.com/iawia002/annie/downloader"
- "github.com/iawia002/annie/extractors"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
)
@@ -48,8 +47,15 @@ func douyuM3u8(url string) ([]douyuURLInfo, int64, error) {
return data, totalSize, nil
}
-// Extract is the main function for extracting data
-func Extract(url string) ([]downloader.Data, error) {
+type extractor struct{}
+
+// New returns a youtube extractor.
+func New() types.Extractor {
+ return &extractor{}
+}
+
+// Extract is the main function to extract the data.
+func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) {
var err error
liveVid := utils.MatchOneOf(url, `https?://www.douyu.com/(\S+)`)
if liveVid != nil {
@@ -62,13 +68,13 @@ func Extract(url string) ([]downloader.Data, error) {
}
titles := utils.MatchOneOf(html, `(.*?)`)
if titles == nil || len(titles) < 2 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
title := titles[1]
vids := utils.MatchOneOf(url, `https?://v.douyu.com/show/(\S+)`)
if vids == nil || len(vids) < 2 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
vid := vids[1]
@@ -76,33 +82,35 @@ func Extract(url string) ([]downloader.Data, error) {
if err != nil {
return nil, err
}
- var dataDict douyuData
- json.Unmarshal([]byte(dataString), &dataDict)
+ dataDict := new(douyuData)
+ if err := json.Unmarshal([]byte(dataString), dataDict); err != nil {
+ return nil, err
+ }
m3u8URLs, totalSize, err := douyuM3u8(dataDict.Data.VideoURL)
if err != nil {
return nil, err
}
- urls := make([]downloader.URL, len(m3u8URLs))
+ urls := make([]*types.Part, len(m3u8URLs))
for index, u := range m3u8URLs {
- urls[index] = downloader.URL{
+ urls[index] = &types.Part{
URL: u.URL,
Size: u.Size,
Ext: "ts",
}
}
- streams := map[string]downloader.Stream{
+ streams := map[string]*types.Stream{
"default": {
- URLs: urls,
- Size: totalSize,
+ Parts: urls,
+ Size: totalSize,
},
}
- return []downloader.Data{
+ return []*types.Data{
{
Site: "斗鱼 douyu.com",
Title: title,
- Type: "video",
+ Type: types.DataTypeVideo,
Streams: streams,
URL: url,
},
diff --git a/extractors/douyu/douyu_test.go b/extractors/douyu/douyu_test.go
index 49f65f73d..1d5190f96 100644
--- a/extractors/douyu/douyu_test.go
+++ b/extractors/douyu/douyu_test.go
@@ -3,13 +3,11 @@ package douyu
import (
"testing"
- "github.com/iawia002/annie/config"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/test"
)
func TestDownload(t *testing.T) {
- config.InfoOnly = true
- config.RetryTimes = 10
tests := []struct {
name string
args test.Args
@@ -25,10 +23,7 @@ func TestDownload(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- // data, err := Extract(tt.args.URL)
- // test.CheckError(t, err)
- // test.Check(t, tt.args, data[0])
- Extract(tt.args.URL)
+ New().Extract(tt.args.URL, types.Options{})
})
}
}
diff --git a/extractors/extractors.go b/extractors/extractors.go
new file mode 100644
index 000000000..641bcda58
--- /dev/null
+++ b/extractors/extractors.go
@@ -0,0 +1,105 @@
+package extractors
+
+import (
+ "net/url"
+ "strings"
+
+ "github.com/iawia002/annie/extractors/bcy"
+ "github.com/iawia002/annie/extractors/bilibili"
+ "github.com/iawia002/annie/extractors/douyin"
+ "github.com/iawia002/annie/extractors/douyu"
+ "github.com/iawia002/annie/extractors/facebook"
+ "github.com/iawia002/annie/extractors/geekbang"
+ "github.com/iawia002/annie/extractors/instagram"
+ "github.com/iawia002/annie/extractors/iqiyi"
+ "github.com/iawia002/annie/extractors/mgtv"
+ "github.com/iawia002/annie/extractors/miaopai"
+ "github.com/iawia002/annie/extractors/netease"
+ "github.com/iawia002/annie/extractors/pixivision"
+ "github.com/iawia002/annie/extractors/pornhub"
+ "github.com/iawia002/annie/extractors/qq"
+ "github.com/iawia002/annie/extractors/tangdou"
+ "github.com/iawia002/annie/extractors/tiktok"
+ "github.com/iawia002/annie/extractors/tumblr"
+ "github.com/iawia002/annie/extractors/twitter"
+ "github.com/iawia002/annie/extractors/types"
+ "github.com/iawia002/annie/extractors/udn"
+ "github.com/iawia002/annie/extractors/universal"
+ "github.com/iawia002/annie/extractors/vimeo"
+ "github.com/iawia002/annie/extractors/weibo"
+ "github.com/iawia002/annie/extractors/xvideos"
+ "github.com/iawia002/annie/extractors/yinyuetai"
+ "github.com/iawia002/annie/extractors/youku"
+ "github.com/iawia002/annie/extractors/youtube"
+ "github.com/iawia002/annie/utils"
+)
+
+var extractorMap map[string]types.Extractor
+
+func init() {
+ douyinExtractor := douyin.New()
+ youtubeExtractor := youtube.New()
+
+ extractorMap = map[string]types.Extractor{
+ "": universal.New(), // universal extractor
+
+ "douyin": douyinExtractor,
+ "iesdouyin": douyinExtractor,
+ "bilibili": bilibili.New(),
+ "bcy": bcy.New(),
+ "pixivision": pixivision.New(),
+ "youku": youku.New(),
+ "youtube": youtubeExtractor,
+ "youtu": youtubeExtractor, // youtu.be
+ "iqiyi": iqiyi.New(),
+ "mgtv": mgtv.New(),
+ "tangdou": tangdou.New(),
+ "tumblr": tumblr.New(),
+ "vimeo": vimeo.New(),
+ "facebook": facebook.New(),
+ "douyu": douyu.New(),
+ "miaopai": miaopai.New(),
+ "163": netease.New(),
+ "weibo": weibo.New(),
+ "instagram": instagram.New(),
+ "twitter": twitter.New(),
+ "qq": qq.New(),
+ "yinyuetai": yinyuetai.New(),
+ "geekbang": geekbang.New(),
+ "pornhub": pornhub.New(),
+ "xvideos": xvideos.New(),
+ "udn": udn.New(),
+ "tiktok": tiktok.New(),
+ }
+}
+
+// Extract is the main function to extract the data.
+func Extract(u string, option types.Options) ([]*types.Data, error) {
+ u = strings.TrimSpace(u)
+ var domain string
+
+ bilibiliShortLink := utils.MatchOneOf(u, `^(av|ep)\d+`)
+ if len(bilibiliShortLink) > 1 {
+ bilibiliURL := map[string]string{
+ "av": "https://www.bilibili.com/video/",
+ "ep": "https://www.bilibili.com/bangumi/play/",
+ }
+ domain = "bilibili"
+ u = bilibiliURL[bilibiliShortLink[1]] + u
+ } else {
+ u, err := url.ParseRequestURI(u)
+ if err != nil {
+ return nil, err
+ }
+ domain = utils.Domain(u.Host)
+ }
+ extractor := extractorMap[domain]
+ videos, err := extractor.Extract(u, option)
+ if err != nil {
+ return nil, err
+ }
+ for _, v := range videos {
+ v.FillUpStreamsData()
+ }
+ return videos, nil
+}
diff --git a/extractors/facebook/facebook.go b/extractors/facebook/facebook.go
index 737d2d7bc..2760145de 100644
--- a/extractors/facebook/facebook.go
+++ b/extractors/facebook/facebook.go
@@ -3,14 +3,20 @@ package facebook
import (
"fmt"
- "github.com/iawia002/annie/downloader"
- "github.com/iawia002/annie/extractors"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
)
-// Extract is the main function for extracting data
-func Extract(url string) ([]downloader.Data, error) {
+type extractor struct{}
+
+// New returns a youtube extractor.
+func New() types.Extractor {
+ return &extractor{}
+}
+
+// Extract is the main function to extract the data.
+func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) {
var err error
html, err := request.Get(url, url, nil)
if err != nil {
@@ -18,11 +24,11 @@ func Extract(url string) ([]downloader.Data, error) {
}
titles := utils.MatchOneOf(html, `(.+)`)
if titles == nil || len(titles) < 2 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
title := titles[1]
- streams := map[string]downloader.Stream{}
+ streams := make(map[string]*types.Stream, 2)
for _, quality := range []string{"sd", "hd"} {
srcElement := utils.MatchOneOf(
html, fmt.Sprintf(`%s_src_no_ratelimit:"(.+?)"`, quality),
@@ -36,23 +42,23 @@ func Extract(url string) ([]downloader.Data, error) {
if err != nil {
return nil, err
}
- urlData := downloader.URL{
+ urlData := &types.Part{
URL: u,
Size: size,
Ext: "mp4",
}
- streams[quality] = downloader.Stream{
- URLs: []downloader.URL{urlData},
+ streams[quality] = &types.Stream{
+ Parts: []*types.Part{urlData},
Size: size,
Quality: quality,
}
}
- return []downloader.Data{
+ return []*types.Data{
{
Site: "Facebook facebook.com",
Title: title,
- Type: "video",
+ Type: types.DataTypeVideo,
Streams: streams,
URL: url,
},
diff --git a/extractors/facebook/facebook_test.go b/extractors/facebook/facebook_test.go
index c1c3b4b5a..44ec5a03c 100644
--- a/extractors/facebook/facebook_test.go
+++ b/extractors/facebook/facebook_test.go
@@ -3,12 +3,11 @@ package facebook
import (
"testing"
- "github.com/iawia002/annie/config"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/test"
)
func TestDownload(t *testing.T) {
- config.InfoOnly = true
tests := []struct {
name string
args test.Args
@@ -25,7 +24,7 @@ func TestDownload(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- data, err := Extract(tt.args.URL)
+ data, err := New().Extract(tt.args.URL, types.Options{})
test.CheckError(t, err)
test.Check(t, tt.args, data[0])
})
diff --git a/extractors/geekbang/geekbang.go b/extractors/geekbang/geekbang.go
index 2a2f5f460..61602b3b8 100644
--- a/extractors/geekbang/geekbang.go
+++ b/extractors/geekbang/geekbang.go
@@ -3,11 +3,11 @@ package geekbang
import (
"encoding/json"
"errors"
+ "fmt"
"net/http"
"strings"
- "github.com/iawia002/annie/downloader"
- "github.com/iawia002/annie/extractors"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
)
@@ -71,22 +71,29 @@ func geekM3u8(url string) ([]geekURLInfo, error) {
return data, nil
}
-// Extract is the main function for extracting data
-func Extract(url string) ([]downloader.Data, error) {
+type extractor struct{}
+
+// New returns a youtube extractor.
+func New() types.Extractor {
+ return &extractor{}
+}
+
+// Extract is the main function to extract the data.
+func (e *extractor) Extract(url string, _ types.Options) ([]*types.Data, error) {
var err error
matches := utils.MatchOneOf(url, `https?://time.geekbang.org/course/detail/(\d+)-(\d+)`)
if matches == nil || len(matches) < 3 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
// Get video information
heanders := map[string]string{"Origin": "https://time.geekbang.org", "Content-Type": "application/json", "Referer": url}
- params := strings.NewReader("{\"id\":" + string(matches[2]) + "}")
+ params := strings.NewReader(fmt.Sprintf(`{"id": %q}`, matches[2]))
res, err := request.Request(http.MethodPost, "https://time.geekbang.org/serv/v1/article", params, heanders)
if err != nil {
return nil, err
}
- defer res.Body.Close()
+ defer res.Body.Close() // nolint
var data geekData
if err = json.NewDecoder(res.Body).Decode(&data); err != nil {
@@ -107,7 +114,7 @@ func Extract(url string) ([]downloader.Data, error) {
if err != nil {
return nil, err
}
- defer res.Body.Close()
+ defer res.Body.Close() // nolint
var playAuth videoPlayAuth
if err = json.NewDecoder(res.Body).Decode(&playAuth); err != nil {
@@ -124,7 +131,7 @@ func Extract(url string) ([]downloader.Data, error) {
if err != nil {
return nil, err
}
- defer res.Body.Close()
+ defer res.Body.Close() // nolint
var playInfo playInfo
if err = json.NewDecoder(res.Body).Decode(&playInfo); err != nil {
@@ -133,7 +140,7 @@ func Extract(url string) ([]downloader.Data, error) {
title := data.Data.Title
- streams := make(map[string]downloader.Stream, len(playInfo.PlayInfoList.PlayInfo))
+ streams := make(map[string]*types.Stream, len(playInfo.PlayInfoList.PlayInfo))
for _, media := range playInfo.PlayInfoList.PlayInfo {
m3u8URLs, err := geekM3u8(media.URL)
@@ -142,27 +149,26 @@ func Extract(url string) ([]downloader.Data, error) {
return nil, err
}
- urls := make([]downloader.URL, len(m3u8URLs))
+ urls := make([]*types.Part, len(m3u8URLs))
for index, u := range m3u8URLs {
- urls[index] = downloader.URL{
+ urls[index] = &types.Part{
URL: u.URL,
Size: u.Size,
Ext: "ts",
}
}
- streams[media.Definition] = downloader.Stream{
- URLs: urls,
- Size: media.Size,
- Quality: media.Definition,
+ streams[media.Definition] = &types.Stream{
+ Parts: urls,
+ Size: media.Size,
}
}
- return []downloader.Data{
+ return []*types.Data{
{
Site: "极客时间 geekbang.org",
Title: title,
- Type: "video",
+ Type: types.DataTypeVideo,
Streams: streams,
URL: url,
},
diff --git a/extractors/geekbang/geekbang_test.go b/extractors/geekbang/geekbang_test.go
index fef1c1558..cc5dd01f2 100644
--- a/extractors/geekbang/geekbang_test.go
+++ b/extractors/geekbang/geekbang_test.go
@@ -3,13 +3,11 @@ package geekbang
import (
"testing"
- "github.com/iawia002/annie/config"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/test"
)
func TestDownload(t *testing.T) {
- config.InfoOnly = true
- config.RetryTimes = 10
tests := []struct {
name string
args test.Args
@@ -25,7 +23,7 @@ func TestDownload(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- data, err := Extract(tt.args.URL)
+ data, err := New().Extract(tt.args.URL, types.Options{})
test.CheckError(t, err)
test.Check(t, tt.args, data[0])
})
diff --git a/extractors/instagram/instagram.go b/extractors/instagram/instagram.go
index a8e46c396..a0ea67572 100644
--- a/extractors/instagram/instagram.go
+++ b/extractors/instagram/instagram.go
@@ -3,8 +3,7 @@ package instagram
import (
"encoding/json"
- "github.com/iawia002/annie/downloader"
- "github.com/iawia002/annie/extractors"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/parser"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
@@ -30,8 +29,15 @@ type instagram struct {
} `json:"entry_data"`
}
-// Extract is the main function for extracting data
-func Extract(url string) ([]downloader.Data, error) {
+type extractor struct{}
+
+// New returns a youtube extractor.
+func New() types.Extractor {
+ return &extractor{}
+}
+
+// Extract is the main function to extract the data.
+func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) {
html, err := request.Get(url, url, nil)
if err != nil {
return nil, err
@@ -45,29 +51,32 @@ func Extract(url string) ([]downloader.Data, error) {
dataStrings := utils.MatchOneOf(html, `window\._sharedData\s*=\s*(.*);`)
if dataStrings == nil || len(dataStrings) < 2 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
dataString := dataStrings[1]
var data instagram
if err = json.Unmarshal([]byte(dataString), &data); err != nil {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
- var realURL, dataType string
- var size int64
- streams := map[string]downloader.Stream{}
+ var (
+ realURL string
+ dataType types.DataType
+ size int64
+ )
+ streams := make(map[string]*types.Stream)
if data.EntryData.PostPage[0].Graphql.ShortcodeMedia.VideoURL != "" {
- // Data
- dataType = "video"
+ // Video
+ dataType = types.DataTypeVideo
realURL = data.EntryData.PostPage[0].Graphql.ShortcodeMedia.VideoURL
size, err = request.Size(realURL, url)
if err != nil {
return nil, err
}
- streams["default"] = downloader.Stream{
- URLs: []downloader.URL{
+ streams["default"] = &types.Stream{
+ Parts: []*types.Part{
{
URL: realURL,
Size: size,
@@ -78,7 +87,7 @@ func Extract(url string) ([]downloader.Data, error) {
}
} else {
// Image
- dataType = "image"
+ dataType = types.DataTypeImage
if data.EntryData.PostPage[0].Graphql.ShortcodeMedia.EdgeSidecar.Edges == nil {
// Single
realURL = data.EntryData.PostPage[0].Graphql.ShortcodeMedia.DisplayURL
@@ -86,8 +95,8 @@ func Extract(url string) ([]downloader.Data, error) {
if err != nil {
return nil, err
}
- streams["default"] = downloader.Stream{
- URLs: []downloader.URL{
+ streams["default"] = &types.Stream{
+ Parts: []*types.Part{
{
URL: realURL,
Size: size,
@@ -99,14 +108,14 @@ func Extract(url string) ([]downloader.Data, error) {
} else {
// Album
var totalSize int64
- var urls []downloader.URL
+ var urls []*types.Part
for _, u := range data.EntryData.PostPage[0].Graphql.ShortcodeMedia.EdgeSidecar.Edges {
realURL = u.Node.DisplayURL
size, err = request.Size(realURL, url)
if err != nil {
return nil, err
}
- urlData := downloader.URL{
+ urlData := &types.Part{
URL: realURL,
Size: size,
Ext: "jpg",
@@ -114,14 +123,14 @@ func Extract(url string) ([]downloader.Data, error) {
urls = append(urls, urlData)
totalSize += size
}
- streams["default"] = downloader.Stream{
- URLs: urls,
- Size: totalSize,
+ streams["default"] = &types.Stream{
+ Parts: urls,
+ Size: totalSize,
}
}
}
- return []downloader.Data{
+ return []*types.Data{
{
Site: "Instagram instagram.com",
Title: title,
diff --git a/extractors/instagram/instagram_test.go b/extractors/instagram/instagram_test.go
index 1ce4ac662..1d508a35a 100644
--- a/extractors/instagram/instagram_test.go
+++ b/extractors/instagram/instagram_test.go
@@ -3,12 +3,11 @@ package instagram
import (
"testing"
- "github.com/iawia002/annie/config"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/test"
)
func TestDownload(t *testing.T) {
- config.InfoOnly = true
tests := []struct {
name string
args test.Args
@@ -40,7 +39,7 @@ func TestDownload(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- data, err := Extract(tt.args.URL)
+ data, err := New().Extract(tt.args.URL, types.Options{})
test.CheckError(t, err)
test.Check(t, tt.args, data[0])
})
diff --git a/extractors/iqiyi/iqiyi.go b/extractors/iqiyi/iqiyi.go
index 7a3ddd6e5..ca83f3bb7 100644
--- a/extractors/iqiyi/iqiyi.go
+++ b/extractors/iqiyi/iqiyi.go
@@ -8,8 +8,7 @@ import (
"strings"
"time"
- "github.com/iawia002/annie/downloader"
- "github.com/iawia002/annie/extractors"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/parser"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
@@ -74,7 +73,7 @@ func getVF(params string) string {
return utils.Md5(params)
}
-func getVPS(tvid, vid string) (iqiyi, error) {
+func getVPS(tvid, vid string) (*iqiyi, error) {
t := time.Now().Unix() * 1000
host := "http://cache.video.qiyi.com"
params := fmt.Sprintf(
@@ -85,15 +84,24 @@ func getVPS(tvid, vid string) (iqiyi, error) {
apiURL := fmt.Sprintf("%s%s&vf=%s", host, params, vf)
info, err := request.Get(apiURL, iqiyiReferer, nil)
if err != nil {
- return iqiyi{}, err
+ return nil, err
+ }
+ data := new(iqiyi)
+ if err := json.Unmarshal([]byte(info), data); err != nil {
+ return nil, err
}
- var data iqiyi
- json.Unmarshal([]byte(info), &data)
return data, nil
}
-// Extract is the main function for extracting data
-func Extract(url string) ([]downloader.Data, error) {
+type extractor struct{}
+
+// New returns a youtube extractor.
+func New() types.Extractor {
+ return &extractor{}
+}
+
+// Extract is the main function to extract the data.
+func (e *extractor) Extract(url string, _ types.Options) ([]*types.Data, error) {
html, err := request.Get(url, iqiyiReferer, nil)
if err != nil {
return nil, err
@@ -112,7 +120,7 @@ func Extract(url string) ([]downloader.Data, error) {
)
}
if tvid == nil || len(tvid) < 2 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
vid := utils.MatchOneOf(
@@ -129,7 +137,7 @@ func Extract(url string) ([]downloader.Data, error) {
)
}
if vid == nil || len(vid) < 2 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
doc, err := parser.GetDoc(html)
@@ -155,10 +163,11 @@ func Extract(url string) ([]downloader.Data, error) {
if videoDatas.Code != "A00000" {
return nil, fmt.Errorf("can't play this video: %s", videoDatas.Msg)
}
- streams := map[string]downloader.Stream{}
+
+ streams := make(map[string]*types.Stream)
urlPrefix := videoDatas.Data.VP.Du
for _, video := range videoDatas.Data.VP.Tkl[0].Vs {
- urls := make([]downloader.URL, len(video.Fs))
+ urls := make([]*types.Part, len(video.Fs))
for index, v := range video.Fs {
realURLData, err := request.Get(urlPrefix+v.L, iqiyiReferer, nil)
if err != nil {
@@ -172,24 +181,24 @@ func Extract(url string) ([]downloader.Data, error) {
if err != nil {
return nil, err
}
- urls[index] = downloader.URL{
+ urls[index] = &types.Part{
URL: realURL.L,
Size: v.B,
Ext: ext,
}
}
- streams[strconv.Itoa(video.Bid)] = downloader.Stream{
- URLs: urls,
+ streams[strconv.Itoa(video.Bid)] = &types.Stream{
+ Parts: urls,
Size: video.Vsize,
Quality: video.Scrsz,
}
}
- return []downloader.Data{
+ return []*types.Data{
{
Site: "爱奇艺 iqiyi.com",
Title: title,
- Type: "video",
+ Type: types.DataTypeVideo,
Streams: streams,
URL: url,
},
diff --git a/extractors/iqiyi/iqiyi_test.go b/extractors/iqiyi/iqiyi_test.go
index d2a5bcc1a..41b1573ab 100644
--- a/extractors/iqiyi/iqiyi_test.go
+++ b/extractors/iqiyi/iqiyi_test.go
@@ -3,13 +3,11 @@ package iqiyi
import (
"testing"
- "github.com/iawia002/annie/config"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/test"
)
func TestDownload(t *testing.T) {
- config.InfoOnly = true
- config.RetryTimes = 10
tests := []struct {
name string
args test.Args
@@ -44,7 +42,7 @@ func TestDownload(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- data, err := Extract(tt.args.URL)
+ data, err := New().Extract(tt.args.URL, types.Options{})
test.CheckError(t, err)
test.Check(t, tt.args, data[0])
})
diff --git a/extractors/mgtv/mgtv.go b/extractors/mgtv/mgtv.go
index 8f2051a4c..698458a30 100644
--- a/extractors/mgtv/mgtv.go
+++ b/extractors/mgtv/mgtv.go
@@ -9,8 +9,7 @@ import (
"strings"
"time"
- "github.com/iawia002/annie/downloader"
- "github.com/iawia002/annie/extractors"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
)
@@ -95,8 +94,15 @@ func encodeTk2(str string) string {
return encodeString
}
-// Extract is the main function for extracting data
-func Extract(url string) ([]downloader.Data, error) {
+type extractor struct{}
+
+// New returns a youtube extractor.
+func New() types.Extractor {
+ return &extractor{}
+}
+
+// Extract is the main function to extract the data.
+func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) {
html, err := request.Get(url, url, nil)
if err != nil {
return nil, err
@@ -110,7 +116,7 @@ func Extract(url string) ([]downloader.Data, error) {
vid = utils.MatchOneOf(html, `vid: (\d+),`)
}
if vid == nil || len(vid) < 2 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
// API extract from https://js.mgtv.com/imgotv-miniv6/global/page/play-tv.js
@@ -160,7 +166,7 @@ func Extract(url string) ([]downloader.Data, error) {
)
mgtvStreams := mgtvData.Data.Stream
var addr mgtvVideoAddr
- streams := map[string]downloader.Stream{}
+ streams := make(map[string]*types.Stream)
for _, stream := range mgtvStreams {
if stream.URL == "" {
continue
@@ -179,26 +185,26 @@ func Extract(url string) ([]downloader.Data, error) {
if err != nil {
return nil, err
}
- urls := make([]downloader.URL, len(m3u8URLs))
+ urls := make([]*types.Part, len(m3u8URLs))
for index, u := range m3u8URLs {
- urls[index] = downloader.URL{
+ urls[index] = &types.Part{
URL: u.URL,
Size: u.Size,
Ext: "ts",
}
}
- streams[stream.Def] = downloader.Stream{
- URLs: urls,
+ streams[stream.Def] = &types.Stream{
+ Parts: urls,
Size: totalSize,
Quality: stream.Name,
}
}
- return []downloader.Data{
+ return []*types.Data{
{
Site: "芒果TV mgtv.com",
Title: title,
- Type: "video",
+ Type: types.DataTypeVideo,
Streams: streams,
URL: url,
},
diff --git a/extractors/mgtv/mgtv_test.go b/extractors/mgtv/mgtv_test.go
index c41eae5d9..566a81c40 100644
--- a/extractors/mgtv/mgtv_test.go
+++ b/extractors/mgtv/mgtv_test.go
@@ -3,13 +3,11 @@ package mgtv
import (
"testing"
- "github.com/iawia002/annie/config"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/test"
)
func TestDownload(t *testing.T) {
- config.InfoOnly = true
- config.RetryTimes = 20
tests := []struct {
name string
args test.Args
@@ -44,10 +42,7 @@ func TestDownload(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- Extract(tt.args.URL)
- // data, err := Extract(tt.args.URL)
- // test.CheckError(t, err)
- // test.Check(t, tt.args, data[0])
+ New().Extract(tt.args.URL, types.Options{})
})
}
}
diff --git a/extractors/miaopai/miaopai.go b/extractors/miaopai/miaopai.go
index 58efd6e34..8dfffb940 100644
--- a/extractors/miaopai/miaopai.go
+++ b/extractors/miaopai/miaopai.go
@@ -8,8 +8,7 @@ import (
"strings"
"time"
- "github.com/iawia002/annie/downloader"
- "github.com/iawia002/annie/extractors"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
)
@@ -39,11 +38,18 @@ func getRandomString(l int) string {
return strings.Join(s, "")
}
-// Extract is the main function for extracting data
-func Extract(url string) ([]downloader.Data, error) {
+type extractor struct{}
+
+// New returns a youtube extractor.
+func New() types.Extractor {
+ return &extractor{}
+}
+
+// Extract is the main function to extract the data.
+func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) {
ids := utils.MatchOneOf(url, `/media/([^\./]+)`, `/show(?:/channel)?/([^\./]+)`)
if ids == nil || len(ids) < 2 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
id := ids[1]
@@ -73,23 +79,23 @@ func Extract(url string) ([]downloader.Data, error) {
if err != nil {
return nil, err
}
- urlData := downloader.URL{
+ urlData := &types.Part{
URL: realURL,
Size: size,
Ext: "mp4",
}
- streams := map[string]downloader.Stream{
+ streams := map[string]*types.Stream{
"default": {
- URLs: []downloader.URL{urlData},
- Size: size,
+ Parts: []*types.Part{urlData},
+ Size: size,
},
}
- return []downloader.Data{
+ return []*types.Data{
{
Site: "秒拍 miaopai.com",
Title: data.Data.Description,
- Type: "video",
+ Type: types.DataTypeVideo,
Streams: streams,
URL: url,
},
diff --git a/extractors/miaopai/miaopai_test.go b/extractors/miaopai/miaopai_test.go
index b1163d384..841381d55 100644
--- a/extractors/miaopai/miaopai_test.go
+++ b/extractors/miaopai/miaopai_test.go
@@ -3,13 +3,11 @@ package miaopai
import (
"testing"
- "github.com/iawia002/annie/config"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/test"
)
func TestDownload(t *testing.T) {
- config.InfoOnly = true
- config.RetryTimes = 10
tests := []struct {
name string
args test.Args
@@ -25,7 +23,7 @@ func TestDownload(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- data, err := Extract(tt.args.URL)
+ data, err := New().Extract(tt.args.URL, types.Options{})
test.CheckError(t, err)
test.Check(t, tt.args, data[0])
})
diff --git a/extractors/netease/netease.go b/extractors/netease/netease.go
index ea78b7210..384ed73be 100644
--- a/extractors/netease/netease.go
+++ b/extractors/netease/netease.go
@@ -5,14 +5,20 @@ import (
netURL "net/url"
"strings"
- "github.com/iawia002/annie/downloader"
- "github.com/iawia002/annie/extractors"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
)
-// Extract is the main function for extracting data
-func Extract(url string) ([]downloader.Data, error) {
+type extractor struct{}
+
+// New returns a youtube extractor.
+func New() types.Extractor {
+ return &extractor{}
+}
+
+// Extract is the main function to extract the data.
+func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) {
url = strings.Replace(url, "/#/", "/", 1)
vid := utils.MatchOneOf(url, `/(mv|video)\?id=(\w+)`)
if vid == nil {
@@ -29,13 +35,13 @@ func Extract(url string) ([]downloader.Data, error) {
titles := utils.MatchOneOf(html, ``)
if titles == nil || len(titles) < 2 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
title := titles[1]
realURLs := utils.MatchOneOf(html, ``)
if realURLs == nil || len(realURLs) < 2 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
realURL, _ := netURL.QueryUnescape(realURLs[1])
@@ -43,22 +49,22 @@ func Extract(url string) ([]downloader.Data, error) {
if err != nil {
return nil, err
}
- urlData := downloader.URL{
+ urlData := &types.Part{
URL: realURL,
Size: size,
Ext: "mp4",
}
- streams := map[string]downloader.Stream{
+ streams := map[string]*types.Stream{
"default": {
- URLs: []downloader.URL{urlData},
- Size: size,
+ Parts: []*types.Part{urlData},
+ Size: size,
},
}
- return []downloader.Data{
+ return []*types.Data{
{
Site: "网易云音乐 music.163.com",
Title: title,
- Type: "video",
+ Type: types.DataTypeVideo,
Streams: streams,
URL: url,
},
diff --git a/extractors/netease/netease_test.go b/extractors/netease/netease_test.go
index 5f3914e71..c897bc43f 100644
--- a/extractors/netease/netease_test.go
+++ b/extractors/netease/netease_test.go
@@ -3,12 +3,11 @@ package netease
import (
"testing"
- "github.com/iawia002/annie/config"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/test"
)
func TestDownload(t *testing.T) {
- config.InfoOnly = true
tests := []struct {
name string
args test.Args
@@ -32,7 +31,7 @@ func TestDownload(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- data, err := Extract(tt.args.URL)
+ data, err := New().Extract(tt.args.URL, types.Options{})
test.CheckError(t, err)
test.Check(t, tt.args, data[0])
})
diff --git a/extractors/pixivision/pixivision.go b/extractors/pixivision/pixivision.go
index 846152648..b9d451a3d 100644
--- a/extractors/pixivision/pixivision.go
+++ b/extractors/pixivision/pixivision.go
@@ -1,33 +1,58 @@
package pixivision
import (
- "github.com/iawia002/annie/downloader"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/parser"
"github.com/iawia002/annie/request"
+ "github.com/iawia002/annie/utils"
)
-// Extract is the main function for extracting data
-func Extract(url string) ([]downloader.Data, error) {
+type extractor struct{}
+
+// New returns a youtube extractor.
+func New() types.Extractor {
+ return &extractor{}
+}
+
+// Extract is the main function to extract the data.
+func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) {
html, err := request.Get(url, url, nil)
if err != nil {
return nil, err
}
- title, urls, err := parser.GetImages(url, html, "am__work__illust ", nil)
+ title, urls, err := parser.GetImages(html, "am__work__illust ", nil)
if err != nil {
return nil, err
}
- streams := map[string]downloader.Stream{
+
+ parts := make([]*types.Part, 0, len(urls))
+ for _, u := range urls {
+ _, ext, err := utils.GetNameAndExt(u)
+ if err != nil {
+ return nil, err
+ }
+ size, err := request.Size(u, url)
+ if err != nil {
+ return nil, err
+ }
+ parts = append(parts, &types.Part{
+ URL: u,
+ Size: size,
+ Ext: ext,
+ })
+ }
+
+ streams := map[string]*types.Stream{
"default": {
- URLs: urls,
- Size: 0,
+ Parts: parts,
},
}
- return []downloader.Data{
+ return []*types.Data{
{
Site: "pixivision pixivision.net",
Title: title,
- Type: "image",
+ Type: types.DataTypeImage,
Streams: streams,
URL: url,
},
diff --git a/extractors/pixivision/pixivision_test.go b/extractors/pixivision/pixivision_test.go
index f278503b7..0ead03047 100644
--- a/extractors/pixivision/pixivision_test.go
+++ b/extractors/pixivision/pixivision_test.go
@@ -3,13 +3,11 @@ package pixivision
import (
"testing"
- "github.com/iawia002/annie/config"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/test"
)
func TestDownload(t *testing.T) {
- config.InfoOnly = true
- config.RetryTimes = 100
tests := []struct {
name string
args test.Args
@@ -24,7 +22,7 @@ func TestDownload(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- data, err := Extract(tt.args.URL)
+ data, err := New().Extract(tt.args.URL, types.Options{})
test.CheckError(t, err)
test.Check(t, tt.args, data[0])
})
diff --git a/extractors/pornhub/pornhub.go b/extractors/pornhub/pornhub.go
index 7db8e1b40..094c4d457 100644
--- a/extractors/pornhub/pornhub.go
+++ b/extractors/pornhub/pornhub.go
@@ -5,8 +5,7 @@ import (
"encoding/json"
"fmt"
- "github.com/iawia002/annie/downloader"
- "github.com/iawia002/annie/extractors"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
)
@@ -17,8 +16,15 @@ type pornhubData struct {
VideoURL string `json:"videoUrl"`
}
-// Extract is the main function for extracting data
-func Extract(url string) ([]downloader.Data, error) {
+type extractor struct{}
+
+// New returns a youtube extractor.
+func New() types.Extractor {
+ return &extractor{}
+}
+
+// Extract is the main function to extract the data.
+func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) {
html, err := request.Get(url, url, nil)
if err != nil {
return nil, err
@@ -26,7 +32,7 @@ func Extract(url string) ([]downloader.Data, error) {
var title string
desc := utils.MatchOneOf(html, `(.+?)`)
- if desc != nil && len(desc) > 1 {
+ if len(desc) > 1 {
title = desc[1]
} else {
title = "pornhub video"
@@ -34,7 +40,7 @@ func Extract(url string) ([]downloader.Data, error) {
realURLs := utils.MatchOneOf(html, `"mediaDefinitions":(.+?),"isVertical"`)
if realURLs == nil || len(realURLs) < 2 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
var pornhubs []pornhubData
@@ -42,7 +48,7 @@ func Extract(url string) ([]downloader.Data, error) {
return nil, err
}
- streams := make(map[string]downloader.Stream, len(pornhubs))
+ streams := make(map[string]*types.Stream, len(pornhubs))
for _, data := range pornhubs {
if data.Format == "hls" {
continue
@@ -67,23 +73,23 @@ func Extract(url string) ([]downloader.Data, error) {
if err != nil {
return nil, err
}
- urlData := downloader.URL{
+ urlData := &types.Part{
URL: realURL,
Size: size,
Ext: "mp4",
}
- streams[quality] = downloader.Stream{
- URLs: []downloader.URL{urlData},
+ streams[quality] = &types.Stream{
+ Parts: []*types.Part{urlData},
Size: size,
Quality: fmt.Sprintf("%sP", quality),
}
}
- return []downloader.Data{
+ return []*types.Data{
{
Site: "Pornhub pornhub.com",
Title: title,
- Type: "video",
+ Type: types.DataTypeVideo,
Streams: streams,
URL: url,
},
diff --git a/extractors/pornhub/pornhub_test.go b/extractors/pornhub/pornhub_test.go
index 6adf5325c..1798c487f 100644
--- a/extractors/pornhub/pornhub_test.go
+++ b/extractors/pornhub/pornhub_test.go
@@ -3,13 +3,11 @@ package pornhub
import (
"testing"
- "github.com/iawia002/annie/config"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/test"
)
func TestPornhub(t *testing.T) {
- config.InfoOnly = true
- config.RetryTimes = 10
tests := []struct {
name string
args test.Args
@@ -24,7 +22,7 @@ func TestPornhub(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- Extract(tt.args.URL)
+ New().Extract(tt.args.URL, types.Options{})
})
}
}
diff --git a/extractors/qq/qq.go b/extractors/qq/qq.go
index c8588dbcb..7cecef4c3 100644
--- a/extractors/qq/qq.go
+++ b/extractors/qq/qq.go
@@ -7,8 +7,7 @@ import (
"strconv"
"strings"
- "github.com/iawia002/annie/downloader"
- "github.com/iawia002/annie/extractors"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
)
@@ -49,8 +48,8 @@ type qqKeyInfo struct {
const qqPlayerVersion string = "3.2.19.333"
-func genStreams(vid, cdn string, data qqVideoInfo) (map[string]downloader.Stream, error) {
- streams := map[string]downloader.Stream{}
+func genStreams(vid, cdn string, data qqVideoInfo) (map[string]*types.Stream, error) {
+ streams := make(map[string]*types.Stream)
var vkey string
// number of fragments
clips := data.Vl.Vi[0].Cl.Fc
@@ -81,7 +80,7 @@ func genStreams(vid, cdn string, data qqVideoInfo) (map[string]downloader.Stream
fns = append(fns[:1], fns[2:]...)
}
- var urls []downloader.URL
+ var urls []*types.Part
var totalSize int64
var filename string
for part := 1; part < clips+1; part++ {
@@ -107,7 +106,7 @@ func genStreams(vid, cdn string, data qqVideoInfo) (map[string]downloader.Stream
}
jsonStrings := utils.MatchOneOf(html, `QZOutputJson=(.+);$`)
if jsonStrings == nil || len(jsonStrings) < 2 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
jsonString := jsonStrings[1]
@@ -125,7 +124,7 @@ func genStreams(vid, cdn string, data qqVideoInfo) (map[string]downloader.Stream
if err != nil {
return nil, err
}
- urlData := downloader.URL{
+ urlData := &types.Part{
URL: realURL,
Size: size,
Ext: "mp4",
@@ -133,8 +132,8 @@ func genStreams(vid, cdn string, data qqVideoInfo) (map[string]downloader.Stream
urls = append(urls, urlData)
totalSize += size
}
- streams[fi.Name] = downloader.Stream{
- URLs: urls,
+ streams[fi.Name] = &types.Stream{
+ Parts: urls,
Size: totalSize,
Quality: fi.Cname,
}
@@ -142,11 +141,18 @@ func genStreams(vid, cdn string, data qqVideoInfo) (map[string]downloader.Stream
return streams, nil
}
-// Extract is the main function for extracting data
-func Extract(url string) ([]downloader.Data, error) {
+type extractor struct{}
+
+// New returns a youtube extractor.
+func New() types.Extractor {
+ return &extractor{}
+}
+
+// Extract is the main function to extract the data.
+func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) {
vids := utils.MatchOneOf(url, `vid=(\w+)`, `/(\w+)\.html`)
if vids == nil || len(vids) < 2 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
vid := vids[1]
@@ -160,7 +166,7 @@ func Extract(url string) ([]downloader.Data, error) {
u, `vid=(\w+)`, `vid:\s*["'](\w+)`, `vid\s*=\s*["']\s*(\w+)`,
)
if vids == nil || len(vids) < 2 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
vid = vids[1]
}
@@ -175,7 +181,7 @@ func Extract(url string) ([]downloader.Data, error) {
}
jsonStrings := utils.MatchOneOf(html, `QZOutputJson=(.+);$`)
if jsonStrings == nil || len(jsonStrings) < 2 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
jsonString := jsonStrings[1]
@@ -194,11 +200,11 @@ func Extract(url string) ([]downloader.Data, error) {
return nil, err
}
- return []downloader.Data{
+ return []*types.Data{
{
Site: "腾讯视频 v.qq.com",
Title: data.Vl.Vi[0].Ti,
- Type: "video",
+ Type: types.DataTypeVideo,
Streams: streams,
URL: url,
},
diff --git a/extractors/qq/qq_test.go b/extractors/qq/qq_test.go
index 139e8a712..b785049e8 100644
--- a/extractors/qq/qq_test.go
+++ b/extractors/qq/qq_test.go
@@ -3,13 +3,11 @@ package qq
import (
"testing"
- "github.com/iawia002/annie/config"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/test"
)
func TestDownload(t *testing.T) {
- config.InfoOnly = true
- config.RetryTimes = 10
tests := []struct {
name string
args test.Args
@@ -44,7 +42,7 @@ func TestDownload(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- data, err := Extract(tt.args.URL)
+ data, err := New().Extract(tt.args.URL, types.Options{})
test.CheckError(t, err)
test.Check(t, tt.args, data[0])
})
diff --git a/extractors/tangdou/tangdou.go b/extractors/tangdou/tangdou.go
index a2a71b7e6..5c9c084ea 100644
--- a/extractors/tangdou/tangdou.go
+++ b/extractors/tangdou/tangdou.go
@@ -1,35 +1,42 @@
package tangdou
import (
- "github.com/iawia002/annie/config"
- "github.com/iawia002/annie/downloader"
- "github.com/iawia002/annie/extractors"
+ "github.com/iawia002/annie/extractors/types"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
)
const referer = "http://www.tangdou.com/html/playlist/view/4173"
-// Extract is the main function for extracting data
-func Extract(uri string) ([]downloader.Data, error) {
- if !config.Playlist {
- return []downloader.Data{tangdouDownload(uri)}, nil
+type extractor struct{}
+
+// New returns a youtube extractor.
+func New() types.Extractor {
+ return &extractor{}
+}
+
+// Extract is the main function to extract the data.
+func (e *extractor) Extract(url string, option types.Options) ([]*types.Data, error) {
+ if !option.Playlist {
+ return []*types.Data{tangdouDownload(url)}, nil
}
- html, err := request.Get(uri, referer, nil)
+
+ html, err := request.Get(url, referer, nil)
if err != nil {
return nil, err
}
+
videoIDs := utils.MatchAll(html, ``)
- needDownloadItems := utils.NeedDownloadList(len(videoIDs))
- extractedData := make([]downloader.Data, len(needDownloadItems))
- wgp := utils.NewWaitGroupPool(config.ThreadNumber)
+ needDownloadItems := utils.NeedDownloadList(option.Items, option.ItemStart, option.ItemEnd, len(videoIDs))
+ extractedData := make([]*types.Data, len(needDownloadItems))
+ wgp := utils.NewWaitGroupPool(option.ThreadNumber)
dataIndex := 0
for index, videoID := range videoIDs {
if !utils.ItemInSlice(index+1, needDownloadItems) || len(videoID) < 2 {
continue
}
wgp.Add()
- go func(index int, videURI string, extractedData []downloader.Data) {
+ go func(index int, videURI string, extractedData []*types.Data) {
defer wgp.Done()
extractedData[index] = tangdouDownload(videURI)
}(dataIndex, videoID[1], extractedData)
@@ -40,17 +47,17 @@ func Extract(uri string) ([]downloader.Data, error) {
}
// tangdouDownload download function for single url
-func tangdouDownload(uri string) downloader.Data {
+func tangdouDownload(uri string) *types.Data {
html, err := request.Get(uri, referer, nil)
if err != nil {
- return downloader.EmptyData(uri, err)
+ return types.EmptyData(uri, err)
}
titles := utils.MatchOneOf(
html, `(.+?)
`, `(.+?)`,
)
if titles == nil || len(titles) < 2 {
- return downloader.EmptyData(uri, extractors.ErrURLParseFailed)
+ return types.EmptyData(uri, types.ErrURLParseFailed)
}
title := titles[1]
@@ -63,37 +70,37 @@ func tangdouDownload(uri string) downloader.Data {
html, `\s*`,
)
if jsonStrings == nil || len(jsonStrings) < 2 {
- return nil, extractors.ErrURLParseFailed
+ return nil, types.ErrURLParseFailed
}
jsonString := jsonStrings[1]
var totalSize int64
- var urls []downloader.URL
+ urls := make([]*types.Part, 0, 1)
if strings.Contains(jsonString, `"image":{"@list"`) {
// there are two data structures in the same field(image)
var imageList tumblrImageList
@@ -79,28 +77,28 @@ func tumblrImageDownload(url, html, title string) ([]downloader.Data, error) {
totalSize = size
urls = append(urls, urlData)
}
- streams := map[string]downloader.Stream{
+ streams := map[string]*types.Stream{
"default": {
- URLs: urls,
- Size: totalSize,
+ Parts: urls,
+ Size: totalSize,
},
}
- return []downloader.Data{
+ return []*types.Data{
{
Site: "Tumblr tumblr.com",
Title: title,
- Type: "image",
+ Type: types.DataTypeImage,
Streams: streams,
URL: url,
},
}, nil
}
-func tumblrVideoDownload(url, html, title string) ([]downloader.Data, error) {
+func tumblrVideoDownload(url, html, title string) ([]*types.Data, error) {
videoURLs := utils.MatchOneOf(html, `