Skip to content

Commit

Permalink
Support download video and playlist for tang dou. (#511)
Browse files Browse the repository at this point in the history
* support download video and playlist for tang dou.

* fix bug for share url and add one test for share url
  • Loading branch information
cxjava authored and iawia002 committed Sep 13, 2019
1 parent b9e08c4 commit 0283218
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,7 @@ pixivision | <https://www.pixivision.net> | | ✓ | | |
YouTube | <https://www.youtube.com> | ✓ | | ✓ | |
爱奇艺 | <https://www.iqiyi.com> | ✓ | | | |
芒果TV | <https://www.mgtv.com> | ✓ | | | |
糖豆广场舞 | <http://www.tangdou.com> | ✓ | | ✓ | |
Tumblr | <https://www.tumblr.com> | ✓ | ✓ | | |
Vimeo | <https://vimeo.com> | ✓ | | | |
Facebook | <https://facebook.com> | ✓ | | | |
Expand Down
98 changes: 98 additions & 0 deletions extractors/tangdou/tangdou.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package tangdou

import (
"github.com/iawia002/annie/config"
"github.com/iawia002/annie/downloader"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
)

const referer = "http://www.tangdou.com/html/playlist/view/4173"

// Extract is the main function for extracting data
func Extract(uri string) ([]downloader.Data, error) {
var err error
if !config.Playlist {
return []downloader.Data{tangdouDownload(uri)}, nil
}
html, err := request.Get(uri, referer, nil)
if err != nil {
return downloader.EmptyList, err
}
videoIDs := utils.MatchAll(html, `<a target="tdplayer" href="(.+?)" class="title">`)
needDownloadItems := utils.NeedDownloadList(len(videoIDs))
extractedData := make([]downloader.Data, len(needDownloadItems))
wgp := utils.NewWaitGroupPool(config.ThreadNumber)
dataIndex := 0
for index, videoID := range videoIDs {
if !utils.ItemInSlice(index+1, needDownloadItems) {
continue
}
wgp.Add()
go func(index int, videURI string, extractedData []downloader.Data) {
defer wgp.Done()
extractedData[index] = tangdouDownload(videURI)
}(dataIndex, videoID[1], extractedData)
dataIndex++
}
wgp.Wait()
return extractedData, nil
}

// tangdouDownload download function for single url
func tangdouDownload(uri string) downloader.Data {
var err error
html, err := request.Get(uri, referer, nil)
if err != nil {
return downloader.EmptyData(uri, err)
}

title := utils.MatchOneOf(
html, `<div class="title">(.+?)</div>`, `<meta name="description" content="(.+?)"`, `<title>(.+?)</title>`,
)[1]

var realURL string
videoURLs := utils.MatchOneOf(
html, `video:'(.+?)'`, `video:"(.+?)"`, `<video.*src="(.+?)"`,
)
if videoURLs == nil {
shareURL := utils.MatchOneOf(
html, `<div class="video">\s*<script src="(.+?)"`,
)[1]
signedVideo, err := request.Get(shareURL, uri, nil)
if err != nil {
return downloader.EmptyData(uri, err)
}
realURL = utils.MatchOneOf(
signedVideo, `src=\\"(.+?)\\"`,
)[1]
} else {
realURL = videoURLs[1]
}

size, err := request.Size(realURL, uri)
if err != nil {
return downloader.EmptyData(uri, err)
}

streams := map[string]downloader.Stream{
"default": {
URLs: []downloader.URL{
{
URL: realURL,
Size: size,
Ext: "mp4",
},
},
Size: size,
},
}

return downloader.Data{
Site: "糖豆广场舞 tangdou.com",
Title: title,
Type: "video",
Streams: streams,
URL: uri,
}
}
81 changes: 81 additions & 0 deletions extractors/tangdou/tangdou_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package tangdou

import (
"testing"

"github.com/iawia002/annie/config"
"github.com/iawia002/annie/downloader"
"github.com/iawia002/annie/test"
)

func TestTangDou(t *testing.T) {
config.InfoOnly = true
config.ThreadNumber = 9
tests := []struct {
name string
args test.Args
playlist bool
}{
{
name: "contains video URL test directly and can get title from body's div tag",
args: test.Args{
URL: "http://www.tangdou.com/v95/dAOQNgMjwT2D5w2.html",
Title: "杨丽萍广场舞《好日子天天过》喜庆双扇扇子舞",
Size: 87611483,
},
},
{
name: "need call share url first and get the signed video URL test and can get title from head's title tag",
args: test.Args{
URL: "http://m.tangdou.com/v94/dAOMMYNjwT1T2Q2.html",
Title: "吉美广场舞《再唱山歌给党听》民族形体舞 附教学视频在线观看",
Size: 50710318,
},
},
{
name: "share url",
args: test.Args{
URL: "https://share.tangdou.com/play.php?vid=1500667821669",
Title: "井岗紫薇广场舞18步双人舞《采槟榔》附分解",
Size: 26693149,
},
},
{
name: "playlist test",
args: test.Args{
URL: "http://www.tangdou.com/playlist/view/1882",
Title: "青儿广场舞《小朋友们都被接走了》原创32步流行舞",
Size: 69448816,
},
playlist: true,
},
{
name: "playlist test2",
args: test.Args{
URL: "http://www.tangdou.com/playlist/view/2816/page/4",
Title: "茉莉广场舞 我向草原问个好 原创藏族风民族舞附教学",
Size: 66284484,
},
playlist: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var (
data []downloader.Data
err error
)
if tt.playlist {
// playlist mode
config.Playlist = true
_, err = Extract(tt.args.URL)
test.CheckError(t, err)
} else {
config.Playlist = false
data, err = Extract(tt.args.URL)
test.CheckError(t, err)
test.Check(t, tt.args, data[0])
}
})
}
}
3 changes: 3 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/iawia002/annie/extractors/pixivision"
"github.com/iawia002/annie/extractors/pornhub"
"github.com/iawia002/annie/extractors/qq"
"github.com/iawia002/annie/extractors/tangdou"
"github.com/iawia002/annie/extractors/tumblr"
"github.com/iawia002/annie/extractors/twitter"
"github.com/iawia002/annie/extractors/universal"
Expand Down Expand Up @@ -130,6 +131,8 @@ func download(videoURL string) bool {
data, err = iqiyi.Extract(videoURL)
case "mgtv":
data, err = mgtv.Extract(videoURL)
case "tangdou":
data, err = tangdou.Extract(videoURL)
case "tumblr":
data, err = tumblr.Extract(videoURL)
case "vimeo":
Expand Down

0 comments on commit 0283218

Please sign in to comment.