diff --git a/CHANGELOG b/CHANGELOG
index aab3e83c..23f53d0c 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,5 +1,38 @@
Instagram PHP Scraper Change Log
================================
+Version v0.4.5
+--------------
+- Enh: BIG thing. Get best resolutions for medias possible
+
+Version v0.4.4
+--------------
+- Bug: Image urls broken for some medias in getMediaByCode()
+
+Version v0.4.3
+--------------
+- Enh: Make methods getContentsFromUrl() and generateRandomString() static
+
+Version v0.4.2
+--------------
+- Bug: Fix 405 error with fetching accounts by id
+- Bug: Fix 405 error with fetching comments by id
+
+Version v0.4.1
+--------------
+- Enh: Url encode endpoints to support for example Japan language
+
+Version v0.4.0
+--------------
+- Enh: New methods to paginate medias getPaginateMedias() and getPaginateMediasByTag()
+
+Version v0.3.5
+--------------
+- Enh: Convenient media pagination getMediasByTag()
+
+Version v0.3.4
+--------------
+- Bug: Fix media urls
+
Version v0.3.3
--------------
- Bug: Include class Location and Comment in InstagramScraper.php
diff --git a/README.md b/README.md
index 89dc902b..712aec70 100644
--- a/README.md
+++ b/README.md
@@ -35,9 +35,9 @@ echo $account->username;
### Search users by username
```php
-$medias = Instagram::searchAccountsByUsername('durov');
+$users = Instagram::searchAccountsByUsername('durov');
echo '
';
-echo json_encode($medias);
+echo json_encode($users);
echo '
';
```
@@ -56,9 +56,20 @@ Available properties:
$imageStandardResolutionUrl;
$imageHighResolutionUrl;
$caption;
+ $captionIsEdited;
+ $isAd;
$videoLowResolutionUrl;
$videoStandardResolutionUrl;
$videoLowBandwidthUrl;
+ $videoViews;
+ $code;
+ $owner;
+ $ownerId;
+ $likesCount;
+ $locationId;
+ $locationName;
+ $commentsCount;
+
*/
echo $medias[0]->imageHighResolutionUrl;
echo $medias[0]->caption;
@@ -163,4 +174,7 @@ $medias = Instagram::getLocationTopMediasById(1);
### Get location medias by location id
```php
$medias = Instagram::getLocationMediasById(1);
-```
\ No newline at end of file
+```
+
+### Other
+Java library: https://github.com/postaddictme/instagram-java-scraper
\ No newline at end of file
diff --git a/index.php b/index.php
index 2a6424f9..2f342aaf 100644
--- a/index.php
+++ b/index.php
@@ -3,13 +3,15 @@
require_once 'vendor/autoload.php';
require_once 'src/InstagramScraper.php';
-use InstagramScraper\Exception\InstagramException;
+
use InstagramScraper\Instagram;
-$instagram = new Instagram();
try {
- $medias = Instagram::getMedias('kevin', 1000);
- echo $medias[998]->imageThumbnailUrl;
+// $medias = Instagram::getMedias('kevin', 1497);
+// echo json_encode($medias[1497]);
+ $medias = InstagramScraper\Instagram::getMediasByTag('paveldurov', 300);
+ echo sizeof($medias) . '\n';
+// echo json_encode($medias);
} catch (\Exception $ex) {
print_r($ex);
}
diff --git a/src/InstagramScraper/Endpoints.php b/src/InstagramScraper/Endpoints.php
index a6e90900..1347f360 100644
--- a/src/InstagramScraper/Endpoints.php
+++ b/src/InstagramScraper/Endpoints.php
@@ -13,11 +13,13 @@ class Endpoints
const MEDIA_JSON_BY_LOCATION_ID = 'https://www.instagram.com/explore/locations/{{facebookLocationId}}/?__a=1&max_id={{maxId}}';
const MEDIA_JSON_BY_TAG = 'https://www.instagram.com/explore/tags/{tag}/?__a=1&max_id={max_id}';
const GENERAL_SEARCH = 'https://www.instagram.com/web/search/topsearch/?query={query}';
- const ACCOUNT_JSON_INFO_BY_ID = 'https://www.instagram.com/query/?q=ig_user({userId}){id,username,external_url,full_name,profile_pic_url,biography,followed_by{count},follows{count},media{count},is_private,is_verified}';
- const LAST_COMMENTS_BY_CODE = 'https://www.instagram.com/query/?q=ig_shortcode({{code}}){comments.last({{count}}){count,nodes{id,created_at,text,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}';
- const COMMENTS_BEFORE_COMMENT_ID_BY_CODE = 'https://www.instagram.com/query/?q=ig_shortcode({{code}}){comments.before({{commentId}},{{count}}){count,nodes{id,created_at,text,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}';
- const LAST_LIKES_BY_CODE = 'https://www.instagram.com/query/?q=ig_shortcode({{code}}){likes{nodes{id,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}';
+ const ACCOUNT_JSON_INFO_BY_ID = 'ig_user({userId}){id,username,external_url,full_name,profile_pic_url,biography,followed_by{count},follows{count},media{count},is_private,is_verified}';
+ const LAST_COMMENTS_BY_CODE = 'ig_shortcode({{code}}){comments.last({{count}}){count,nodes{id,created_at,text,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}';
+ const COMMENTS_BEFORE_COMMENT_ID_BY_CODE = 'ig_shortcode({{code}}){comments.before({{commentId}},{{count}}){count,nodes{id,created_at,text,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}';
+ const LAST_LIKES_BY_CODE = 'ig_shortcode({{code}}){likes{nodes{id,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}';
+ const INSTAGRAM_QUERY_URL = 'https://www.instagram.com/query/';
+ const INSTAGRAM_CDN_URL = 'https://scontent.cdninstagram.com/';
public static function getAccountPageLink($username)
{
diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php
index 15b5894e..d83df454 100644
--- a/src/InstagramScraper/Instagram.php
+++ b/src/InstagramScraper/Instagram.php
@@ -34,18 +34,11 @@ public static function getAccount($username)
public static function getAccountById($id)
{
-
if (!is_numeric($id)) {
throw new \InvalidArgumentException('User id must be integer or integer wrapped in string');
}
- $response = Request::get(Endpoints::getAccountJsonInfoLinkByAccountId($id));
- if ($response->code === 404) {
- throw new InstagramNotFoundException('Account with given username does not exist.');
- }
- if ($response->code !== 200) {
- throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.');
- }
- $userArray = json_decode($response->raw_body, true);
+ $parameters = Endpoints::getAccountJsonInfoLinkByAccountId($id);
+ $userArray = json_decode(self::getContentsFromUrl($parameters), true);
if ($userArray['status'] === 'fail') {
throw new InstagramException($userArray['message']);
}
@@ -55,6 +48,38 @@ public static function getAccountById($id)
return Account::fromAccountPage($userArray);
}
+ private static function getContentsFromUrl($parameters)
+ {
+ if (!function_exists('curl_init')) {
+ return false;
+ }
+ $random = self::generateRandomString();
+ $ch = curl_init();
+ curl_setopt($ch, CURLOPT_URL, Endpoints::INSTAGRAM_QUERY_URL);
+ curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
+ curl_setopt($ch, CURLOPT_POST, 1);
+ curl_setopt($ch, CURLOPT_POSTFIELDS, 'q=' . $parameters);
+ $headers = array();
+ $headers[] = "Cookie: csrftoken=$random;";
+ $headers[] = "X-Csrftoken: $random";
+ $headers[] = "Referer: https://www.instagram.com/";
+ curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
+ $output = curl_exec($ch);
+ curl_close($ch);
+ return $output;
+ }
+
+ private static function generateRandomString($length = 10)
+ {
+ $characters = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
+ $charactersLength = strlen($characters);
+ $randomString = '';
+ for ($i = 0; $i < $length; $i++) {
+ $randomString .= $characters[rand(0, $charactersLength - 1)];
+ }
+ return $randomString;
+ }
+
public static function getMedias($username, $count = 20, $maxId = '')
{
$index = 0;
@@ -95,8 +120,8 @@ public static function getPaginateMedias($username, $maxId = '')
$medias = [];
$toReturn = [
- 'medias' => $medias,
- 'maxId' => $maxId,
+ 'medias' => $medias,
+ 'maxId' => $maxId,
'hasNextPage' => $hasNextPage
];
@@ -124,8 +149,8 @@ public static function getPaginateMedias($username, $maxId = '')
$hasNextPage = $arr['more_available'];
$toReturn = [
- 'medias' => $medias,
- 'maxId' => $maxId,
+ 'medias' => $medias,
+ 'maxId' => $maxId,
'hasNextPage' => $hasNextPage
];
@@ -160,6 +185,7 @@ public static function getMediasByTag($tag, $count = 12, $maxId = '')
{
$index = 0;
$medias = [];
+ $mediaIds = [];
$hasNextPage = true;
while ($index < $count && $hasNextPage) {
$response = Request::get(Endpoints::getMediasJsonByTagLink($tag, $maxId));
@@ -179,7 +205,12 @@ public static function getMediasByTag($tag, $count = 12, $maxId = '')
if ($index === $count) {
return $medias;
}
- $medias[] = Media::fromTagPage($mediaArray);
+ $media = Media::fromTagPage($mediaArray);
+ if (in_array($media->id, $mediaIds)) {
+ return $medias;
+ }
+ $mediaIds[] = $media->id;
+ $medias[] = $media;
$index++;
}
if (count($nodes) == 0) {
@@ -197,8 +228,8 @@ public static function getPaginateMediasByTag($tag, $maxId = '')
$medias = [];
$toReturn = [
- 'medias' => $medias,
- 'maxId' => $maxId,
+ 'medias' => $medias,
+ 'maxId' => $maxId,
'hasNextPage' => $hasNextPage
];
@@ -230,11 +261,13 @@ public static function getPaginateMediasByTag($tag, $maxId = '')
$maxId = $arr['tag']['media']['page_info']['end_cursor'];
$hasNextPage = $arr['tag']['media']['page_info']['has_next_page'];
+ $count = $arr['tag']['media']['count'];
$toReturn = [
- 'medias' => $medias,
- 'maxId' => $maxId,
- 'hasNextPage' => $hasNextPage
+ 'medias' => $medias,
+ 'count' => $count,
+ 'maxId' => $maxId,
+ 'hasNextPage' => $hasNextPage,
];
return $toReturn;
@@ -336,17 +369,12 @@ public static function getMediaCommentsByCode($code, $count = 10, $maxId = null)
$remain = 0;
}
if (!isset($maxId)) {
- $response = Request::get(Endpoints::getLastCommentsByCodeLink($code, $numberOfCommentsToRetreive));
+ $parameters = Endpoints::getLastCommentsByCodeLink($code, $numberOfCommentsToRetreive);
+
} else {
- $response = Request::get(Endpoints::getCommentsBeforeCommentIdByCode($code, $numberOfCommentsToRetreive, $maxId));
+ $parameters = Endpoints::getCommentsBeforeCommentIdByCode($code, $numberOfCommentsToRetreive, $maxId);
}
- if ($response->code === 404) {
- throw new InstagramNotFoundException('Account with given username does not exist.');
- }
- if ($response->code !== 200) {
- throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.');
- }
- $jsonResponse = json_decode($response->raw_body, true);
+ $jsonResponse = json_decode(self::getContentsFromUrl($parameters), true);
$nodes = $jsonResponse['comments']['nodes'];
foreach ($nodes as $commentArray) {
$comments[] = Comment::fromApi($commentArray);
@@ -422,21 +450,4 @@ public static function getLocationById($facebookLocationId)
$jsonResponse = json_decode($response->raw_body, true);
return Location::makeLocation($jsonResponse['location']);
}
-
- public static function getLastLikesByCode($code)
- {
- $response = Request::get(Endpoints::getLastLikesByCodeLink($code));
- if ($response->code === 404) {
- throw new InstagramNotFoundException('Media with this shortcode doesn\'t exist');
- }
- if ($response->code !== 200) {
- throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.');
- }
- $jsonResponse = json_decode($response->raw_body, true);
- $users = [];
- foreach ($jsonResponse['likes']['nodes'] as $userArray) {
- $users[] = Account::fromAccountPage($userArray['user']);
- }
- return $users;
- }
}
\ No newline at end of file
diff --git a/src/InstagramScraper/Model/Media.php b/src/InstagramScraper/Model/Media.php
index de47cda7..089f0427 100644
--- a/src/InstagramScraper/Model/Media.php
+++ b/src/InstagramScraper/Model/Media.php
@@ -43,10 +43,11 @@ public static function fromApi($mediaArray)
$instance->link = $mediaArray['link'];
$instance->commentsCount = $mediaArray['comments']['count'];
$instance->likesCount = $mediaArray['likes']['count'];
- $instance->imageLowResolutionUrl = self::getCleanImageUrl($mediaArray['images']['low_resolution']['url']);
- $instance->imageThumbnailUrl = self::getCleanImageUrl($mediaArray['images']['thumbnail']['url']);
- $instance->imageStandardResolutionUrl = self::getCleanImageUrl($mediaArray['images']['standard_resolution']['url']);
- $instance->imageHighResolutionUrl = str_replace('320x320', '1080x1080', $instance->imageLowResolutionUrl);
+ $images = self::getImageUrls($mediaArray['images']['standard_resolution']['url']);
+ $instance->imageLowResolutionUrl = $images['low'];
+ $instance->imageThumbnailUrl = $images['thumbnail'];
+ $instance->imageStandardResolutionUrl = $images['standard'];
+ $instance->imageHighResolutionUrl = $images['high'];
if (isset($mediaArray['caption'])) {
$instance->caption = $mediaArray['caption']['text'];
}
@@ -54,9 +55,11 @@ public static function fromApi($mediaArray)
if (isset($mediaArray['video_views'])) {
$instance->videoViews = $mediaArray['video_views'];
}
- $instance->videoLowResolutionUrl = $mediaArray['videos']['low_resolution']['url'];
- $instance->videoStandardResolutionUrl = $mediaArray['videos']['standard_resolution']['url'];
- $instance->videoLowBandwidthUrl = $mediaArray['videos']['low_bandwidth']['url'];
+ if (isset($mediaArray['videos'])) {
+ $instance->videoLowResolutionUrl = $mediaArray['videos']['low_resolution']['url'];
+ $instance->videoStandardResolutionUrl = $mediaArray['videos']['standard_resolution']['url'];
+ $instance->videoLowBandwidthUrl = $mediaArray['videos']['low_bandwidth']['url'];
+ }
}
if (isset($mediaArray['location']['id'])) {
$instance->locationId = $mediaArray['location']['id'];
@@ -67,9 +70,17 @@ public static function fromApi($mediaArray)
return $instance;
}
- private static function getCleanImageUrl($imageUrl)
+ private static function getImageUrls($imageUrl)
{
- return strpos($imageUrl, '?ig_cache_key=') ? substr($imageUrl, 0, strpos($imageUrl, '?ig_cache_key=')) : $imageUrl;
+ $parts = explode('/', parse_url($imageUrl)['path']);
+ $imageName = $parts[sizeof($parts) - 1];
+ $urls = [
+ 'thumbnail' => Endpoints::INSTAGRAM_CDN_URL . 't/s150x150/' . $imageName,
+ 'low' => Endpoints::INSTAGRAM_CDN_URL . 't/s320x320/' . $imageName,
+ 'standard' => Endpoints::INSTAGRAM_CDN_URL . 't/s640x640/' . $imageName,
+ 'high' => Endpoints::INSTAGRAM_CDN_URL . 't/' . $imageName
+ ];
+ return $urls;
}
public static function fromMediaPage($mediaArray)
@@ -111,29 +122,6 @@ public static function fromMediaPage($mediaArray)
return $instance;
}
- private static function getImageUrls($imageUrl)
- {
- $imageUrl = self::getCleanImageUrl($imageUrl);
- $parts = explode('/', parse_url($imageUrl)['path']);
- if (sizeof($parts) == 4) {
- $standard = 'https://scontent.cdninstagram.com/' . $parts[1] . '/s640x640/' . $parts[2] . '/' . $parts[3];
- } else {
- if (isset($parts[4]) && $parts[4][0] == 'p') {
- $standard = 'https://scontent.cdninstagram.com/' . $parts[1] . '/p640x640/' . $parts[3] . '/' . $parts[4];
- } else {
- $standard = 'https://scontent.cdninstagram.com/' . $parts[1] . '/s640x640/' . $parts[3] . '/' . $parts[4];
- }
- }
-
- $urls = [
- 'standard' => $standard,
- 'low' => str_replace('640x640', '320x320', $standard),
- 'high' => str_replace('640x640', '1080x1080', $standard),
- 'thumbnail' => str_replace('640x640', '150x150', $standard)
- ];
- return $urls;
- }
-
public static function fromTagPage($mediaArray)
{
$instance = new self();
@@ -146,8 +134,11 @@ public static function fromTagPage($mediaArray)
$instance->caption = $mediaArray['caption'];
}
$instance->createdTime = $mediaArray['date'];
- $instance->imageThumbnailUrl = self::getCleanImageUrl($mediaArray['thumbnail_src']);
- $instance->imageStandardResolutionUrl = self::getCleanImageUrl($mediaArray['display_src']);
+ $images = self::getImageUrls($mediaArray['display_src']);
+ $instance->imageStandardResolutionUrl = $images['standard'];
+ $instance->imageLowResolutionUrl = $images['low'];
+ $instance->imageHighResolutionUrl = $images['high'];
+ $instance->imageThumbnailUrl = $images['thumbnail'];
$instance->type = 'image';
if ($mediaArray['is_video']) {
$instance->type = 'video';
@@ -159,21 +150,13 @@ public static function fromTagPage($mediaArray)
public static function getIdFromCode($code)
{
- $alphabet = [
- '-' => 62, '1' => 53, '0' => 52, '3' => 55, '2' => 54, '5' => 57, '4' => 56, '7' => 59, '6' => 58, '9' => 61,
- '8' => 60, 'A' => 0, 'C' => 2, 'B' => 1, 'E' => 4, 'D' => 3, 'G' => 6, 'F' => 5, 'I' => 8, 'H' => 7,
- 'K' => 10, 'J' => 9, 'M' => 12, 'L' => 11, 'O' => 14, 'N' => 13, 'Q' => 16, 'P' => 15, 'S' => 18, 'R' => 17,
- 'U' => 20, 'T' => 19, 'W' => 22, 'V' => 21, 'Y' => 24, 'X' => 23, 'Z' => 25, '_' => 63, 'a' => 26, 'c' => 28,
- 'b' => 27, 'e' => 30, 'd' => 29, 'g' => 32, 'f' => 31, 'i' => 34, 'h' => 33, 'k' => 36, 'j' => 35, 'm' => 38,
- 'l' => 37, 'o' => 40, 'n' => 39, 'q' => 42, 'p' => 41, 's' => 44, 'r' => 43, 'u' => 46, 't' => 45, 'w' => 48,
- 'v' => 47, 'y' => 50, 'x' => 49, 'z' => 51
- ];
- $n = 0;
+ $alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_';
+ $id = 0;
for ($i = 0; $i < strlen($code); $i++) {
$c = $code[$i];
- $n = $n * 64 + $alphabet[$c];
+ $id = $id * 64 + strpos($alphabet, $c);
}
- return $n;
+ return $id;
}
public static function getLinkFromId($id)
@@ -187,12 +170,12 @@ public static function getCodeFromId($id)
$parts = explode('_', $id);
$id = $parts[0];
$alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_';
- $shortenedId = '';
+ $code = '';
while ($id > 0) {
$remainder = $id % 64;
$id = ($id - $remainder) / 64;
- $shortenedId = $alphabet{$remainder} . $shortenedId;
+ $code = $alphabet{$remainder} . $code;
};
- return $shortenedId;
+ return $code;
}
-}
+}
\ No newline at end of file
diff --git a/tests/InstagramTest.php b/tests/InstagramTest.php
index 3e840bdc..fa13e404 100644
--- a/tests/InstagramTest.php
+++ b/tests/InstagramTest.php
@@ -3,6 +3,7 @@
require '../vendor/autoload.php';
use InstagramScraper\Instagram;
+use InstagramScraper\Model\Media;
use PHPUnit\Framework\TestCase;
@@ -63,4 +64,20 @@ public function testGetLocationById()
$location = Instagram::getLocationById(1);
$this->assertEquals('Dog Patch Labs', $location->name);
}
+
+ public function testGetIdFromCode()
+ {
+ $code = Media::getCodeFromId('1270593720437182847');
+ $this->assertEquals('BGiDkHAgBF_', $code);
+ $code = Media::getCodeFromId('1270593720437182847_3');
+ $this->assertEquals('BGiDkHAgBF_', $code);
+ $code = Media::getCodeFromId(1270593720437182847);
+ $this->assertEquals('BGiDkHAgBF_', $code);
+ }
+
+ public function testGetCodeFromId()
+ {
+ $id = Media::getIdFromCode('BGiDkHAgBF_');
+ $this->assertEquals(1270593720437182847, $id);
+ }
}
\ No newline at end of file