-
Notifications
You must be signed in to change notification settings - Fork 0
/
instagram_account.js
89 lines (75 loc) · 3.43 KB
/
instagram_account.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
// Phantombuster configuration {
"phantombuster command: nodejs"
"phantombuster package: 5"
"phantombuster flags: save-folder"
"phantombuster dependencies: lib-Sponsored.js, lib-Mattr-Helper.js"
const Buster = require("phantombuster")
const buster = new Buster()
const Nick = require("nickjs")
const nick = new Nick()
const _ = require("lodash")
const Sponsored = require("./lib-Sponsored")
const MattrHelper = require('./lib-Mattr-Helper');
// }
nick.newTab().then(async (tab) => {
const mattrHelper = new MattrHelper(buster, nick, tab);
const arg = buster.argument;
const { handle } = arg;
await mattrHelper.openTab(`https://www.instagram.com/${handle}`);
// await tab.untilVisible("#react-root > section > main header:first-of-type section:first-of-type h1:first-of-type"); // Make sure we have loaded the page
await tab.untilVisible("#react-root"); // Make sure we have loaded the page
await tab.inject("../injectables/jquery-3.0.0.min.js"); // We're going to use jQuery to scrape
await tab.inject("../injectables/lodash-full-4.13.1.min.js"); // We're going to use lodash to extract certain data from graphql structure
return await tab.evaluate((arg, callback) => {
// Here we're in the page context. It's like being in your browser's inspector tool
// At the moment, Instagram creates a global js object on media pages called _sharedData which contains the graphql edge data.
const userEdge = _.get(window._sharedData || {}, 'entry_data.ProfilePage[0].graphql.user', {});
const mediaEdges = _.get(userEdge, 'edge_owner_to_timeline_media.edges', []);
const account = {
id: _.get(userEdge, 'id', null),
username: _.get(userEdge, 'username', null),
name: _.get(userEdge, 'full_name', null),
bio: _.get(userEdge, 'biography', null),
postCount: _.get(userEdge, 'edge_owner_to_timeline_media.count', null),
followerCount: _.get(userEdge, 'edge_followed_by.count', null),
followCount: _.get(userEdge, 'edge_follow.count', null),
isPrivate: _.get(userEdge, 'is_private', null),
isVerified: _.get(userEdge, 'is_verified', null),
avatar: _.get(userEdge, 'profile_pic_url_hd', null),
};
callback(null, { account, mediaEdges });
});
})
.then(async ({ account, mediaEdges }) => {
console.log('Performing data mapping...');
const medias = [];
_.forEach(mediaEdges, function(_mediaEdge) {
const mediaEdge = _.get(_mediaEdge, 'node', {});
const mediaData = {
id: _.get(mediaEdge, 'id', null),
shortcode: _.get(mediaEdge, 'shortcode', null),
caption: _.get(mediaEdge, 'edge_media_to_caption.edges[0].node.text', null),
likesCount: _.get(mediaEdge, 'edge_media_preview_like.count', null),
viewsCount: _.get(mediaEdge, 'video_view_count', null),
commentsCount: _.get(mediaEdge, 'edge_media_to_comment.count', null),
mediaType: _.get(mediaEdge, 'is_video', false) ? 'video' : 'image',
imageUrl: _.get(mediaEdge, 'display_url', null),
videoUrl: _.get(mediaEdge, 'video_url', null),
createdAtTime: _.get(mediaEdge, 'taken_at_timestamp', null),
owner: { ...account },
isSponsored: Sponsored.isSponsoredMedia(mediaEdge),
};
medias.push(mediaData);
});
account.medias = medias;
console.log('Data mapping complete!');
await buster.setResultObject(account);
})
.then(() => {
console.log("Job done!");
nick.exit();
})
.catch((err) => {
console.log(`Something went wrong: ${err}`);
nick.exit(1);
});