-
Notifications
You must be signed in to change notification settings - Fork 3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support download video and playlist for tang dou. (#511)
* support download video and playlist for tang dou. * fix bug for share url and add one test for share url
- Loading branch information
Showing
4 changed files
with
183 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
package tangdou | ||
|
||
import ( | ||
"github.com/iawia002/annie/config" | ||
"github.com/iawia002/annie/downloader" | ||
"github.com/iawia002/annie/request" | ||
"github.com/iawia002/annie/utils" | ||
) | ||
|
||
const referer = "http://www.tangdou.com/html/playlist/view/4173" | ||
|
||
// Extract is the main function for extracting data | ||
func Extract(uri string) ([]downloader.Data, error) { | ||
var err error | ||
if !config.Playlist { | ||
return []downloader.Data{tangdouDownload(uri)}, nil | ||
} | ||
html, err := request.Get(uri, referer, nil) | ||
if err != nil { | ||
return downloader.EmptyList, err | ||
} | ||
videoIDs := utils.MatchAll(html, `<a target="tdplayer" href="(.+?)" class="title">`) | ||
needDownloadItems := utils.NeedDownloadList(len(videoIDs)) | ||
extractedData := make([]downloader.Data, len(needDownloadItems)) | ||
wgp := utils.NewWaitGroupPool(config.ThreadNumber) | ||
dataIndex := 0 | ||
for index, videoID := range videoIDs { | ||
if !utils.ItemInSlice(index+1, needDownloadItems) { | ||
continue | ||
} | ||
wgp.Add() | ||
go func(index int, videURI string, extractedData []downloader.Data) { | ||
defer wgp.Done() | ||
extractedData[index] = tangdouDownload(videURI) | ||
}(dataIndex, videoID[1], extractedData) | ||
dataIndex++ | ||
} | ||
wgp.Wait() | ||
return extractedData, nil | ||
} | ||
|
||
// tangdouDownload download function for single url | ||
func tangdouDownload(uri string) downloader.Data { | ||
var err error | ||
html, err := request.Get(uri, referer, nil) | ||
if err != nil { | ||
return downloader.EmptyData(uri, err) | ||
} | ||
|
||
title := utils.MatchOneOf( | ||
html, `<div class="title">(.+?)</div>`, `<meta name="description" content="(.+?)"`, `<title>(.+?)</title>`, | ||
)[1] | ||
|
||
var realURL string | ||
videoURLs := utils.MatchOneOf( | ||
html, `video:'(.+?)'`, `video:"(.+?)"`, `<video.*src="(.+?)"`, | ||
) | ||
if videoURLs == nil { | ||
shareURL := utils.MatchOneOf( | ||
html, `<div class="video">\s*<script src="(.+?)"`, | ||
)[1] | ||
signedVideo, err := request.Get(shareURL, uri, nil) | ||
if err != nil { | ||
return downloader.EmptyData(uri, err) | ||
} | ||
realURL = utils.MatchOneOf( | ||
signedVideo, `src=\\"(.+?)\\"`, | ||
)[1] | ||
} else { | ||
realURL = videoURLs[1] | ||
} | ||
|
||
size, err := request.Size(realURL, uri) | ||
if err != nil { | ||
return downloader.EmptyData(uri, err) | ||
} | ||
|
||
streams := map[string]downloader.Stream{ | ||
"default": { | ||
URLs: []downloader.URL{ | ||
{ | ||
URL: realURL, | ||
Size: size, | ||
Ext: "mp4", | ||
}, | ||
}, | ||
Size: size, | ||
}, | ||
} | ||
|
||
return downloader.Data{ | ||
Site: "糖豆广场舞 tangdou.com", | ||
Title: title, | ||
Type: "video", | ||
Streams: streams, | ||
URL: uri, | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
package tangdou | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/iawia002/annie/config" | ||
"github.com/iawia002/annie/downloader" | ||
"github.com/iawia002/annie/test" | ||
) | ||
|
||
func TestTangDou(t *testing.T) { | ||
config.InfoOnly = true | ||
config.ThreadNumber = 9 | ||
tests := []struct { | ||
name string | ||
args test.Args | ||
playlist bool | ||
}{ | ||
{ | ||
name: "contains video URL test directly and can get title from body's div tag", | ||
args: test.Args{ | ||
URL: "http://www.tangdou.com/v95/dAOQNgMjwT2D5w2.html", | ||
Title: "杨丽萍广场舞《好日子天天过》喜庆双扇扇子舞", | ||
Size: 87611483, | ||
}, | ||
}, | ||
{ | ||
name: "need call share url first and get the signed video URL test and can get title from head's title tag", | ||
args: test.Args{ | ||
URL: "http://m.tangdou.com/v94/dAOMMYNjwT1T2Q2.html", | ||
Title: "吉美广场舞《再唱山歌给党听》民族形体舞 附教学视频在线观看", | ||
Size: 50710318, | ||
}, | ||
}, | ||
{ | ||
name: "share url", | ||
args: test.Args{ | ||
URL: "https://share.tangdou.com/play.php?vid=1500667821669", | ||
Title: "井岗紫薇广场舞18步双人舞《采槟榔》附分解", | ||
Size: 26693149, | ||
}, | ||
}, | ||
{ | ||
name: "playlist test", | ||
args: test.Args{ | ||
URL: "http://www.tangdou.com/playlist/view/1882", | ||
Title: "青儿广场舞《小朋友们都被接走了》原创32步流行舞", | ||
Size: 69448816, | ||
}, | ||
playlist: true, | ||
}, | ||
{ | ||
name: "playlist test2", | ||
args: test.Args{ | ||
URL: "http://www.tangdou.com/playlist/view/2816/page/4", | ||
Title: "茉莉广场舞 我向草原问个好 原创藏族风民族舞附教学", | ||
Size: 66284484, | ||
}, | ||
playlist: true, | ||
}, | ||
} | ||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
var ( | ||
data []downloader.Data | ||
err error | ||
) | ||
if tt.playlist { | ||
// playlist mode | ||
config.Playlist = true | ||
_, err = Extract(tt.args.URL) | ||
test.CheckError(t, err) | ||
} else { | ||
config.Playlist = false | ||
data, err = Extract(tt.args.URL) | ||
test.CheckError(t, err) | ||
test.Check(t, tt.args, data[0]) | ||
} | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters