Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support download video and playlist for tang dou. #511

Merged
merged 3 commits into from
Sep 13, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,7 @@ pixivision | <https://www.pixivision.net> | | ✓ | | |
YouTube | <https://www.youtube.com> | ✓ | | ✓ | |
爱奇艺 | <https://www.iqiyi.com> | ✓ | | | |
芒果TV | <https://www.mgtv.com> | ✓ | | | |
糖豆广场舞 | <http://www.tangdou.com> | ✓ | | ✓ | |
Tumblr | <https://www.tumblr.com> | ✓ | ✓ | | |
Vimeo | <https://vimeo.com> | ✓ | | | |
Facebook | <https://facebook.com> | ✓ | | | |
Expand Down
98 changes: 98 additions & 0 deletions extractors/tangdou/tangdou.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package tangdou

import (
"github.com/iawia002/annie/config"
"github.com/iawia002/annie/downloader"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
)

const referer = "http://www.tangdou.com/html/playlist/view/4173"

// Extract is the main function for extracting data
func Extract(uri string) ([]downloader.Data, error) {
var err error
if !config.Playlist {
return []downloader.Data{tangdouDownload(uri)}, nil
}
html, err := request.Get(uri, referer, nil)
if err != nil {
return downloader.EmptyList, err
}
videoIDs := utils.MatchAll(html, `<a target="tdplayer" href="(.+?)" class="title">`)
needDownloadItems := utils.NeedDownloadList(len(videoIDs))
extractedData := make([]downloader.Data, len(needDownloadItems))
wgp := utils.NewWaitGroupPool(config.ThreadNumber)
dataIndex := 0
for index, videoID := range videoIDs {
if !utils.ItemInSlice(index+1, needDownloadItems) {
continue
}
wgp.Add()
go func(index int, videURI string, extractedData []downloader.Data) {
defer wgp.Done()
extractedData[index] = tangdouDownload(videURI)
}(dataIndex, videoID[1], extractedData)
dataIndex++
}
wgp.Wait()
return extractedData, nil
}

// tangdouDownload download function for single url
func tangdouDownload(uri string) downloader.Data {
var err error
html, err := request.Get(uri, referer, nil)
if err != nil {
return downloader.EmptyData(uri, err)
}

title := utils.MatchOneOf(
html, `<div class="title">(.+?)</div>`, `<meta name="description" content="(.+?)"`, `<title>(.+?)</title>`,
)[1]

var realURL string
videoURLs := utils.MatchOneOf(
html, `video:'(.+?)'`, `video:"(.+?)"`, `<video.*src="(.+?)"`,
)
if videoURLs == nil {
shareURL := utils.MatchOneOf(
html, `<div class="video">\s*<script src="(.+?)"`,
)[1]
signedVideo, err := request.Get(shareURL, uri, nil)
if err != nil {
return downloader.EmptyData(uri, err)
}
realURL = utils.MatchOneOf(
signedVideo, `src=\\"(.+?)\\"`,
)[1]
} else {
realURL = videoURLs[1]
}

size, err := request.Size(realURL, uri)
if err != nil {
return downloader.EmptyData(uri, err)
}

streams := map[string]downloader.Stream{
"default": {
URLs: []downloader.URL{
{
URL: realURL,
Size: size,
Ext: "mp4",
},
},
Size: size,
},
}

return downloader.Data{
Site: "糖豆广场舞 tangdou.com",
Title: title,
Type: "video",
Streams: streams,
URL: uri,
}
}
81 changes: 81 additions & 0 deletions extractors/tangdou/tangdou_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package tangdou

import (
"testing"

"github.com/iawia002/annie/config"
"github.com/iawia002/annie/downloader"
"github.com/iawia002/annie/test"
)

func TestTangDou(t *testing.T) {
config.InfoOnly = true
config.ThreadNumber = 9
tests := []struct {
name string
args test.Args
playlist bool
}{
{
name: "contains video URL test directly and can get title from body's div tag",
args: test.Args{
URL: "http://www.tangdou.com/v95/dAOQNgMjwT2D5w2.html",
Title: "杨丽萍广场舞《好日子天天过》喜庆双扇扇子舞",
Size: 87611483,
},
},
{
name: "need call share url first and get the signed video URL test and can get title from head's title tag",
args: test.Args{
URL: "http://m.tangdou.com/v94/dAOMMYNjwT1T2Q2.html",
Title: "吉美广场舞《再唱山歌给党听》民族形体舞 附教学视频在线观看",
Size: 50710318,
},
},
{
name: "share url",
args: test.Args{
URL: "https://share.tangdou.com/play.php?vid=1500667821669",
Title: "井岗紫薇广场舞18步双人舞《采槟榔》附分解",
Size: 26693149,
},
},
{
name: "playlist test",
args: test.Args{
URL: "http://www.tangdou.com/playlist/view/1882",
Title: "青儿广场舞《小朋友们都被接走了》原创32步流行舞",
Size: 69448816,
},
playlist: true,
},
{
name: "playlist test2",
args: test.Args{
URL: "http://www.tangdou.com/playlist/view/2816/page/4",
Title: "茉莉广场舞 我向草原问个好 原创藏族风民族舞附教学",
Size: 66284484,
},
playlist: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var (
data []downloader.Data
err error
)
if tt.playlist {
// playlist mode
config.Playlist = true
_, err = Extract(tt.args.URL)
test.CheckError(t, err)
} else {
config.Playlist = false
data, err = Extract(tt.args.URL)
test.CheckError(t, err)
test.Check(t, tt.args, data[0])
}
})
}
}
3 changes: 3 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/iawia002/annie/extractors/pixivision"
"github.com/iawia002/annie/extractors/pornhub"
"github.com/iawia002/annie/extractors/qq"
"github.com/iawia002/annie/extractors/tangdou"
"github.com/iawia002/annie/extractors/tumblr"
"github.com/iawia002/annie/extractors/twitter"
"github.com/iawia002/annie/extractors/universal"
Expand Down Expand Up @@ -129,6 +130,8 @@ func download(videoURL string) bool {
data, err = iqiyi.Extract(videoURL)
case "mgtv":
data, err = mgtv.Extract(videoURL)
case "tangdou":
data, err = tangdou.Extract(videoURL)
case "tumblr":
data, err = tumblr.Extract(videoURL)
case "vimeo":
Expand Down