-
Notifications
You must be signed in to change notification settings - Fork 3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added Pinterest video downloading feature (#1253)
* Added Pinterest video downloading feature * Update pinterest.go Fixed title extractor
- Loading branch information
1 parent
c17006e
commit 3da4af3
Showing
5 changed files
with
157 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
name: pinterest | ||
|
||
on: | ||
push: | ||
paths: | ||
- "extractors/pinterest/*.go" | ||
- ".github/workflows/stream_pinterest.yml" | ||
pull_request: | ||
paths: | ||
- "extractors/tiktok/*.go" | ||
- ".github/workflows/stream_pinterest.yml" | ||
schedule: | ||
# run ci weekly | ||
- cron: "0 0 * * 0" | ||
|
||
jobs: | ||
test: | ||
runs-on: ${{ matrix.os }} | ||
strategy: | ||
matrix: | ||
go: ["1.20"] | ||
os: [ubuntu-latest] | ||
name: ${{ matrix.os }} | ||
steps: | ||
- uses: actions/checkout@v2 | ||
- uses: actions/setup-go@v2 | ||
with: | ||
go-version: ${{ matrix.go }} | ||
|
||
- name: Test | ||
run: go test -timeout 5m -race -coverpkg=./... -coverprofile=coverage.txt github.com/iawia002/lux/extractors/pinterest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
package pinterest | ||
|
||
import ( | ||
"regexp" | ||
"strings" | ||
|
||
"github.com/pkg/errors" | ||
|
||
"github.com/iawia002/lux/extractors" | ||
"github.com/iawia002/lux/request" | ||
) | ||
|
||
func init() { | ||
extractors.Register("pinterest", New()) | ||
} | ||
|
||
type extractor struct{} | ||
|
||
// New returns a pinterest extractor. | ||
func New() extractors.Extractor { | ||
return &extractor{} | ||
} | ||
|
||
// Extract is the main function to extract the data. | ||
func (e *extractor) Extract(url string, option extractors.Options) ([]*extractors.Data, error) { | ||
html, err := request.Get(url, url, map[string]string{ | ||
// pinterest require a user agent | ||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:98.0) Gecko/20100101 Firefox/98.0", | ||
}) | ||
if err != nil { | ||
return nil, errors.WithStack(err) | ||
} | ||
|
||
urlMatcherRegExp := regexp.MustCompile(`"contentUrl":"https:\/\/v1\.pinimg\.com\/videos\/mc\/720p\/[a-zA-Z0-9\/]+\.mp4`) | ||
|
||
downloadURLMatcher := urlMatcherRegExp.FindStringSubmatch(html) | ||
|
||
if len(downloadURLMatcher) == 0 { | ||
return nil, errors.WithStack(extractors.ErrURLParseFailed) | ||
} | ||
|
||
videoURL := strings.ReplaceAll(downloadURLMatcher[0], `"contentUrl":"`, "") | ||
|
||
titleMatcherRegExp := regexp.MustCompile(`<title[^>]*>([^<]+)</title>`) | ||
|
||
titleMatcher := titleMatcherRegExp.FindStringSubmatch(html) | ||
|
||
if len(titleMatcher) == 0 { | ||
return nil, errors.WithStack(extractors.ErrURLParseFailed) | ||
} | ||
|
||
title := strings.ReplaceAll(strings.ReplaceAll(titleMatcher[0], "<title>", ""), "</title>", "") | ||
|
||
titleArr := strings.Split(title, "|") | ||
|
||
if len(titleArr) > 0 { | ||
title = titleArr[0] | ||
} | ||
|
||
streams := make(map[string]*extractors.Stream) | ||
|
||
size, err := request.Size(videoURL, url) | ||
if err != nil { | ||
return nil, errors.WithStack(err) | ||
} | ||
urlData := &extractors.Part{ | ||
URL: videoURL, | ||
Size: size, | ||
Ext: "mp4", | ||
} | ||
streams["default"] = &extractors.Stream{ | ||
Parts: []*extractors.Part{urlData}, | ||
Size: size, | ||
} | ||
|
||
return []*extractors.Data{ | ||
{ | ||
Site: "Pinterest pinterest.com", | ||
Title: title, | ||
Type: extractors.DataTypeVideo, | ||
Streams: streams, | ||
URL: url, | ||
}, | ||
}, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
package pinterest | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/iawia002/lux/extractors" | ||
"github.com/iawia002/lux/test" | ||
) | ||
|
||
func TestDownload(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
args test.Args | ||
}{ | ||
{ | ||
name: "normal test 1", | ||
args: test.Args{ | ||
URL: "https://www.pinterest.com/pin/creamy-cheesy-pretzel-bites-video--368450813272292084/", | ||
Title: "Creamy Cheesy Pretzel Bites [Video] ", | ||
Size: 30247497, | ||
}, | ||
}, | ||
{ | ||
name: "normal test 2", | ||
args: test.Args{ | ||
URL: "https://www.pinterest.com/pin/532198880988430823/", | ||
Title: "Pin on TikTok ~ The world of food", | ||
Size: 4676927, | ||
}, | ||
}, | ||
} | ||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
data, err := New().Extract(tt.args.URL, extractors.Options{}) | ||
test.CheckError(t, err) | ||
test.Check(t, tt.args, data[0]) | ||
}) | ||
} | ||
} |