From c4267b2c70eb96c82b22eeeb536fa7daa9501ecc Mon Sep 17 00:00:00 2001 From: romanWork Date: Wed, 14 Mar 2018 14:40:48 +0300 Subject: [PATCH 01/24] fix getAccountById --- src/InstagramScraper/Instagram.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index 159b6833..b14e21b0 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -589,10 +589,10 @@ public function getAccount($username) } $userArray = json_decode($response->raw_body, true, 512, JSON_BIGINT_AS_STRING); - if (!isset($userArray['user'])) { + if (!isset($userArray['graphql']['user'])) { throw new InstagramException('Account with this username does not exist'); } - return Account::create($userArray['user']); + return Account::create($userArray['graphql']['user']); } /** From 821914fc5f5e72110b5042626d186c0a97657ebf Mon Sep 17 00:00:00 2001 From: romanWork Date: Wed, 14 Mar 2018 19:07:34 +0300 Subject: [PATCH 02/24] fix getMedias after Instagram changed API --- src/InstagramScraper/Endpoints.php | 10 +++++ src/InstagramScraper/Instagram.php | 64 ++++++++++++++++++++++-------- tests/InstagramNoAuthTest.php | 25 ++++++++++++ tests/InstagramTest.php | 12 ++++++ 4 files changed, 94 insertions(+), 17 deletions(-) create mode 100644 tests/InstagramNoAuthTest.php diff --git a/src/InstagramScraper/Endpoints.php b/src/InstagramScraper/Endpoints.php index 75fe6325..1ec7b885 100644 --- a/src/InstagramScraper/Endpoints.php +++ b/src/InstagramScraper/Endpoints.php @@ -9,6 +9,9 @@ class Endpoints const ACCOUNT_PAGE = 'https://www.instagram.com/{username}'; const MEDIA_LINK = 'https://www.instagram.com/p/{code}'; const ACCOUNT_MEDIAS = 'https://www.instagram.com/{username}/?__a=1&max_id={max_id}'; + const ACCOUNT_MEDIAS_BY_USER_ID = 'https://www.instagram.com/graphql/query/?query_hash=472f257a40c653c64c666ce877d59d2b&variables={"id":"{id}","first":{count},"after":"{end_cursor}"}'; + + const ACCOUNT_JSON_INFO = 'https://www.instagram.com/{username}/?__a=1'; const MEDIA_JSON_INFO = 'https://www.instagram.com/p/{code}/?__a=1'; const MEDIA_JSON_BY_LOCATION_ID = 'https://www.instagram.com/explore/locations/{{facebookLocationId}}/?__a=1&max_id={{maxId}}'; @@ -54,6 +57,13 @@ public static function getAccountMediasJsonLink($username, $maxId = '') $url = str_replace('{username}', urlencode($username), static::ACCOUNT_MEDIAS); return str_replace('{max_id}', urlencode($maxId), $url); } + + public static function getAccountMediasByUserIdJsonLink($id, $count=12, $cursor = '') + { + $url = str_replace('{id}', urlencode($id), static::ACCOUNT_MEDIAS_BY_USER_ID); + $url = str_replace('{count}', urlencode($count), $url); + return str_replace('{end_cursor}', urlencode($cursor), $url); + } public static function getMediaPageLink($code) { diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index b14e21b0..5811cfd3 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -220,21 +220,37 @@ private function generateHeaders($session) * @throws InstagramException */ public function getMedias($username, $count = 20, $maxId = '') + { + $account = $this->getAccount($username); + return $this->getMediasByUserId($account->getId(), $count, $maxId); + } + + + /** + * @param int $id + * @param int $count + * @param string $maxId + * + * @return Media[] + * @throws InstagramException + */ + public function getMediasByUserId($id, $count = 20, $cursor = '') { $index = 0; $medias = []; $isMoreAvailable = true; while ($index < $count && $isMoreAvailable) { - $response = Request::get(Endpoints::getAccountMediasJsonLink($username, $maxId), $this->generateHeaders($this->userSession)); + $response = Request::get(Endpoints::getAccountMediasByUserIdJsonLink($id, $count, $cursor)); + if (static::HTTP_OK !== $response->code) { throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.'); } - + $arr = json_decode($response->raw_body, true, 512, JSON_BIGINT_AS_STRING); if (!is_array($arr)) { throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.'); } - $nodes = $arr['user']['media']['nodes']; + $nodes = $arr['data']['user']['edge_owner_to_timeline_media']['edges']; // fix - count takes longer/has more overhead if (!isset($nodes) || empty($nodes)) { return []; @@ -243,18 +259,18 @@ public function getMedias($username, $count = 20, $maxId = '') if ($index === $count) { return $medias; } - $medias[] = Media::create($mediaArray); + $medias[] = Media::create($mediaArray['node']); $index++; } if (empty($nodes) || !isset($nodes)) { return $medias; } - $maxId = $nodes[count($nodes) - 1]['id']; - $isMoreAvailable = $arr['user']['media']['page_info']['has_next_page']; + $cursor = $arr['data']['user']['edge_owner_to_timeline_media']['page_info']['end_cursor']; + $isMoreAvailable = $arr['data']['user']['edge_owner_to_timeline_media']['page_info']['has_next_page']; } return $medias; } - + /** * @param $mediaId * @@ -537,40 +553,54 @@ public function getMediaLikesByCode($code, $count = 10, $maxId = null) * @throws \InvalidArgumentException */ public function getAccountById($id) + { + $username = $this->getUsernameById($id); + return $this->getAccount($username); + } + + + /** + * @param string $id + * + * @return string + * @throws InstagramException + * @throws \InvalidArgumentException + */ + public function getUsernameById($id) { // Use the follow page to get the account. The follow url will redirect to the home page for the user, // which has the username embedded in the url. - + if (!is_numeric($id)) { throw new \InvalidArgumentException('User id must be integer or integer wrapped in string'); } - + $url = Endpoints::getFollowUrl($id); - + // Cut a request by disabling redirects. Request::curlOpt(CURLOPT_FOLLOWLOCATION, FALSE); $response = Request::get($url, $this->generateHeaders($this->userSession)); Request::curlOpt(CURLOPT_FOLLOWLOCATION, TRUE); - + if ($response->code === 400) { throw new InstagramException('Account with this id does not exist.'); } - + if ($response->code !== 302) { throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->raw_body) . ' Something went wrong. Please report issue.'); } - + $cookies = static::parseCookies($response->headers['Set-Cookie']); $this->userSession['csrftoken'] = $cookies['csrftoken']; - + // Get the username from the response url. $responseUrl = $response->headers['Location']; $urlParts = explode('/', rtrim($responseUrl, '/')); $username = end($urlParts); - - return $this->getAccount($username); + + return $username; } - + /** * @param string $username * diff --git a/tests/InstagramNoAuthTest.php b/tests/InstagramNoAuthTest.php new file mode 100644 index 00000000..e69aa636 --- /dev/null +++ b/tests/InstagramNoAuthTest.php @@ -0,0 +1,25 @@ +getMediasByUserId(3); + $this->assertEquals(20, count($nonPrivateAccountMedias)); + } + +} \ No newline at end of file diff --git a/tests/InstagramTest.php b/tests/InstagramTest.php index b28698ac..942c326a 100644 --- a/tests/InstagramTest.php +++ b/tests/InstagramTest.php @@ -31,6 +31,9 @@ public function testGetAccountByUsername() $this->assertEquals('3', $account->getId()); } + /** + * @group getAccountById + */ public function testGetAccountById() { @@ -122,6 +125,15 @@ public function testGeMediaCommentsByCode() //TODO: check why returns less comments $this->assertEquals(32, sizeof($comments)); } + + /** + * @group getUsernameById + */ + public function testGetUsernameById() + { + $username = self::$instagram->getUsernameById(3); + $this->assertEquals('kevin', $username); + } // TODO: Add test getMediaById // TODO: Add test getLocationById From 7c95ea2f96bddde89f5722ce3c76bf0a1c4f4b56 Mon Sep 17 00:00:00 2001 From: romanWork Date: Mon, 19 Mar 2018 13:53:12 +0300 Subject: [PATCH 03/24] fix spaces --- src/InstagramScraper/Instagram.php | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index 38e967c3..1d757c13 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -571,37 +571,37 @@ public function getUsernameById($id) { // Use the follow page to get the account. The follow url will redirect to the home page for the user, // which has the username embedded in the url. - + if (!is_numeric($id)) { throw new \InvalidArgumentException('User id must be integer or integer wrapped in string'); } - + $url = Endpoints::getFollowUrl($id); - + // Cut a request by disabling redirects. Request::curlOpt(CURLOPT_FOLLOWLOCATION, FALSE); $response = Request::get($url, $this->generateHeaders($this->userSession)); Request::curlOpt(CURLOPT_FOLLOWLOCATION, TRUE); - + if ($response->code === 400) { throw new InstagramException('Account with this id does not exist.'); } - + if ($response->code !== 302) { throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->raw_body) . ' Something went wrong. Please report issue.'); } - + $cookies = static::parseCookies($response->headers['Set-Cookie']); $this->userSession['csrftoken'] = $cookies['csrftoken']; - + // Get the username from the response url. $responseUrl = $response->headers['Location']; $urlParts = explode('/', rtrim($responseUrl, '/')); $username = end($urlParts); - + return $username; } - + /** * @param string $username * From ec6fee02bcd5cf3f41f45316db080247a467fa23 Mon Sep 17 00:00:00 2001 From: romanWork Date: Mon, 19 Mar 2018 13:57:38 +0300 Subject: [PATCH 04/24] delete unused code --- src/InstagramScraper/Endpoints.php | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/InstagramScraper/Endpoints.php b/src/InstagramScraper/Endpoints.php index 8418b0e3..05501808 100644 --- a/src/InstagramScraper/Endpoints.php +++ b/src/InstagramScraper/Endpoints.php @@ -55,13 +55,6 @@ public static function getAccountMediasJsonLink($userId, $maxId = '') return str_replace('{max_id}', urlencode($maxId), $url); } - public static function getAccountMediasByUserIdJsonLink($id, $count=12, $cursor = '') - { - $url = str_replace('{id}', urlencode($id), static::ACCOUNT_MEDIAS_BY_USER_ID); - $url = str_replace('{count}', urlencode($count), $url); - return str_replace('{end_cursor}', urlencode($cursor), $url); - } - public static function getMediaPageLink($code) { return str_replace('{code}', urlencode($code), static::MEDIA_LINK); From 4ab8f1953f103876bd8e623423d8f2d5de472f6c Mon Sep 17 00:00:00 2001 From: romanWork Date: Mon, 19 Mar 2018 13:58:59 +0300 Subject: [PATCH 05/24] delete space --- src/InstagramScraper/Endpoints.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/InstagramScraper/Endpoints.php b/src/InstagramScraper/Endpoints.php index 05501808..f8c88e77 100644 --- a/src/InstagramScraper/Endpoints.php +++ b/src/InstagramScraper/Endpoints.php @@ -54,7 +54,7 @@ public static function getAccountMediasJsonLink($userId, $maxId = '') $url = str_replace('{user_id}', urlencode($userId), static::ACCOUNT_MEDIAS); return str_replace('{max_id}', urlencode($maxId), $url); } - + public static function getMediaPageLink($code) { return str_replace('{code}', urlencode($code), static::MEDIA_LINK); From 483064dd1110a38fbb8e308059b4c02e738b9f06 Mon Sep 17 00:00:00 2001 From: Giedrius Kartanovic Date: Mon, 19 Mar 2018 13:45:35 +0000 Subject: [PATCH 06/24] Fix getPaginateMedias to return if there is next page --- src/InstagramScraper/Instagram.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index bc172455..5cb0813f 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -358,7 +358,7 @@ public function getPaginateMedias($username, $maxId = '') } $maxId = $arr['data']['user']['edge_owner_to_timeline_media']['page_info']['end_cursor']; - $isMoreAvailable = $arr['data']['user']['edge_owner_to_timeline_media']['page_info']['has_next_page']; + $hasNextPage = $arr['data']['user']['edge_owner_to_timeline_media']['page_info']['has_next_page']; $toReturn = [ 'medias' => $medias, From ab8b69f56c795f125f08bdc0d65e8eed393078bd Mon Sep 17 00:00:00 2001 From: Giedrius Kartanovic Date: Mon, 19 Mar 2018 14:57:01 +0000 Subject: [PATCH 07/24] Allow configuring how many media objects are retrieved in a single request for ACCOUNT_MEDIAS --- src/InstagramScraper/Endpoints.php | 13 ++++++++++++- src/InstagramScraper/Instagram.php | 9 +++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/InstagramScraper/Endpoints.php b/src/InstagramScraper/Endpoints.php index f8c88e77..95b49a87 100644 --- a/src/InstagramScraper/Endpoints.php +++ b/src/InstagramScraper/Endpoints.php @@ -8,7 +8,7 @@ class Endpoints const LOGIN_URL = 'https://www.instagram.com/accounts/login/ajax/'; const ACCOUNT_PAGE = 'https://www.instagram.com/{username}'; const MEDIA_LINK = 'https://www.instagram.com/p/{code}'; - const ACCOUNT_MEDIAS = 'https://instagram.com/graphql/query/?query_id=17888483320059182&id={user_id}&first=30&after={max_id}'; + const ACCOUNT_MEDIAS = 'https://instagram.com/graphql/query/?query_id=17888483320059182&id={user_id}&first={count}&after={max_id}'; const ACCOUNT_JSON_INFO = 'https://www.instagram.com/{username}/?__a=1'; const MEDIA_JSON_INFO = 'https://www.instagram.com/p/{code}/?__a=1'; const MEDIA_JSON_BY_LOCATION_ID = 'https://www.instagram.com/explore/locations/{{facebookLocationId}}/?__a=1&max_id={{maxId}}'; @@ -34,6 +34,16 @@ class Endpoints const GRAPH_QL_QUERY_URL = 'https://www.instagram.com/graphql/query/?query_id={{queryId}}'; + private static $requestMediaCount = 30; + + /** + * @param int $count + */ + public static function setAccountMediasRequestCount($count) + { + static::$requestMediaCount = $count; + } + public static function getAccountPageLink($username) { return str_replace('{username}', urlencode($username), static::ACCOUNT_PAGE); @@ -52,6 +62,7 @@ public static function getAccountJsonInfoLinkByAccountId($id) public static function getAccountMediasJsonLink($userId, $maxId = '') { $url = str_replace('{user_id}', urlencode($userId), static::ACCOUNT_MEDIAS); + $url = str_replace('{count}', static::$requestMediaCount, $url); return str_replace('{max_id}', urlencode($maxId), $url); } diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index 5cb0813f..a6cf1d75 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -100,6 +100,15 @@ public static function searchTagsByTagName($tag) return $hashtags; } + /** + * Set how many media objects should be retrieved in a single request + * @param int $count + */ + public static function setAccountMediasRequestCount($count) + { + Endpoints::setAccountMediasRequestCount($count); + } + /** * @param \stdClass|string $rawError * From 540ea8da0a08ce8f68ae90e8ada9ca8138274146 Mon Sep 17 00:00:00 2001 From: Alexandr Date: Fri, 23 Mar 2018 15:02:13 +0700 Subject: [PATCH 08/24] Fix media CDN --- src/InstagramScraper/Model/Media.php | 49 +++++++++++++--------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/src/InstagramScraper/Model/Media.php b/src/InstagramScraper/Model/Media.php index 79506d4a..2086a21c 100644 --- a/src/InstagramScraper/Model/Media.php +++ b/src/InstagramScraper/Model/Media.php @@ -408,20 +408,33 @@ protected function initPropertiesCustom($value, $prop, $arr) case 'likes': $this->likesCount = $arr[$prop]['count']; break; - case 'images': - $images = self::getImageUrls($arr[$prop]['standard_resolution']['url']); - $this->imageLowResolutionUrl = $images['low']; - $this->imageThumbnailUrl = $images['thumbnail']; - $this->imageStandardResolutionUrl = $images['standard']; - $this->imageHighResolutionUrl = $images['high']; - break; case 'thumbnail_resources': - $thumbnailsUrl = []; foreach( $value as $thumbnail ) { - $thumbnailsUrl[] = $thumbnail['src']; + $thumbnailsUrl[] = $thumbnail['src']; + switch ($thumbnail['config_width']) { + case 150: + $this->imageThumbnailUrl = $thumbnail['src']; + break; + case 320: + $this->imageLowResolutionUrl = $thumbnail['src']; + break; + case 640: + $this->imageStandardResolutionUrl = $thumbnail['src']; + break; + default:; + } } $this->squareThumbnailsUrl = $thumbnailsUrl; break; + case 'display_url': + $this->imageHighResolutionUrl = $value; + break; + case 'display_src': + $this->imageHighResolutionUrl = $value; + if (!isset($this->type)) { + $this->type = static::TYPE_IMAGE; + } + break; case 'carousel_media': $this->type = self::TYPE_CAROUSEL; $this->carouselMedia = []; @@ -491,13 +504,6 @@ protected function initPropertiesCustom($value, $prop, $arr) case 'edge_liked_by': $this->likesCount = $arr[$prop]['count']; break; - case 'display_url': - $images = self::getImageUrls($arr[$prop]); - $this->imageStandardResolutionUrl = $images['standard']; - $this->imageLowResolutionUrl = $images['low']; - $this->imageHighResolutionUrl = $images['high']; - $this->imageThumbnailUrl = $images['thumbnail']; - break; case 'edge_media_to_caption': if (is_array($arr[$prop]['edges']) && !empty($arr[$prop]['edges'])) { $first_caption = $arr[$prop]['edges'][0]; @@ -512,7 +518,6 @@ protected function initPropertiesCustom($value, $prop, $arr) if (!is_array($arr[$prop]['edges'])) { break; } - foreach ($arr[$prop]['edges'] as $edge) { if (!isset($edge['node'])) { continue; @@ -527,16 +532,6 @@ protected function initPropertiesCustom($value, $prop, $arr) case 'date': $this->createdTime = (int)$value; break; - case 'display_src': - $images = static::getImageUrls($value); - $this->imageStandardResolutionUrl = $images['standard']; - $this->imageLowResolutionUrl = $images['low']; - $this->imageHighResolutionUrl = $images['high']; - $this->imageThumbnailUrl = $images['thumbnail']; - if (!isset($this->type)) { - $this->type = static::TYPE_IMAGE; - } - break; case '__typename': if ($value == 'GraphImage') { $this->type = static::TYPE_IMAGE; From e84ce2ad7b121fa6a583187b2967ac52b7a9cc94 Mon Sep 17 00:00:00 2001 From: ZipDriver Date: Fri, 23 Mar 2018 10:12:37 +0200 Subject: [PATCH 09/24] Update Instagram.php --- src/InstagramScraper/Instagram.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index bc172455..3d621258 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -591,7 +591,7 @@ public function getAccount($username) $userArray = json_decode($response->raw_body, true, 512, JSON_BIGINT_AS_STRING); if (!isset($userArray['graphql']['user'])) { - throw new InstagramException('Account with this username does not exist'); + throw new InstagramNotFoundException('Account with this username does not exist', 404); } return Account::create($userArray['graphql']['user']); } From 587af7aa1ce771f3f12219ef48ef03783798a260 Mon Sep 17 00:00:00 2001 From: raiym Date: Fri, 23 Mar 2018 12:03:23 +0300 Subject: [PATCH 10/24] typos --- src/InstagramScraper/Instagram.php | 365 +++++++++++++-------------- src/InstagramScraper/Model/Media.php | 84 +++--- 2 files changed, 223 insertions(+), 226 deletions(-) diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index 760e6763..44ecbb1f 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -27,15 +27,14 @@ class Instagram const PAGING_DELAY_MAXIMUM_MICROSEC = 3000000; // 3 sec max delay to simulate browser private static $instanceCache; + public $pagingTimeLimitSec = self::PAGING_TIME_LIMIT_SEC; + public $pagingDelayMinimumMicrosec = self::PAGING_DELAY_MINIMUM_MICROSEC; + public $pagingDelayMaximumMicrosec = self::PAGING_DELAY_MAXIMUM_MICROSEC; private $sessionUsername; private $sessionPassword; private $userSession; private $userAgent = null; - public $pagingTimeLimitSec = self::PAGING_TIME_LIMIT_SEC; - public $pagingDelayMinimumMicrosec = self::PAGING_DELAY_MINIMUM_MICROSEC; - public $pagingDelayMaximumMicrosec = self::PAGING_DELAY_MAXIMUM_MICROSEC; - /** * @param string $username * @param string $password @@ -100,15 +99,6 @@ public static function searchTagsByTagName($tag) return $hashtags; } - /** - * Set how many media objects should be retrieved in a single request - * @param int $count - */ - public static function setAccountMediasRequestCount($count) - { - Endpoints::setAccountMediasRequestCount($count); - } - /** * @param \stdClass|string $rawError * @@ -131,6 +121,54 @@ private static function getErrorBody($rawError) } + /** + * Set how many media objects should be retrieved in a single request + * @param int $count + */ + public static function setAccountMediasRequestCount($count) + { + Endpoints::setAccountMediasRequestCount($count); + } + + /** + * @param array $config + */ + public static function setProxy(array $config) + { + $defaultConfig = [ + 'port' => false, + 'tunnel' => false, + 'address' => false, + 'type' => CURLPROXY_HTTP, + 'timeout' => false, + 'auth' => [ + 'user' => '', + 'pass' => '', + 'method' => CURLAUTH_BASIC + ], + ]; + + $config = array_replace($defaultConfig, $config); + + Request::proxy($config['address'], $config['port'], $config['type'], $config['tunnel']); + + if (isset($config['auth'])) { + Request::proxyAuth($config['auth']['user'], $config['auth']['pass'], $config['auth']['method']); + } + + if (isset($config['timeout'])) { + Request::timeout((int)$config['timeout']); + } + } + + /** + * Disable proxy for all requests + */ + public static function disableProxy() + { + Request::proxy(''); + } + /** * @param string $username * @@ -164,60 +202,59 @@ public function searchAccountsByUsername($username) } /** - * @param $userAgent + * @param $session * - * @return string + * @return array */ - public function setUserAgent($userAgent) + private function generateHeaders($session) { - return $this->userAgent = $userAgent; + $headers = []; + if ($session) { + $cookies = ''; + foreach ($session as $key => $value) { + $cookies .= "$key=$value; "; + } + $headers = [ + 'cookie' => $cookies, + 'referer' => Endpoints::BASE_URL . '/', + 'x-csrftoken' => $session['csrftoken'], + ]; + } + + if ($this->getUserAgent()) { + $headers['user-agent'] = $this->getUserAgent(); + } + + return $headers; } /** - * @param $userAgent * - * @return null + * @return string */ - public function resetUserAgent($userAgent) + public function getUserAgent() { - return $this->userAgent = null; + return $this->userAgent; } /** + * @param $userAgent * * @return string */ - public function getUserAgent() + public function setUserAgent($userAgent) { - return $this->userAgent; + return $this->userAgent = $userAgent; } /** - * @param $session + * @param $userAgent * - * @return array + * @return null */ - private function generateHeaders($session) + public function resetUserAgent($userAgent) { - $headers = []; - if ($session) { - $cookies = ''; - foreach ($session as $key => $value) { - $cookies .= "$key=$value; "; - } - $headers = [ - 'cookie' => $cookies, - 'referer' => Endpoints::BASE_URL . '/', - 'x-csrftoken' => $session['csrftoken'], - ]; - } - - if($this->getUserAgent()) - { - $headers['user-agent'] = $this->getUserAgent(); - } - - return $headers; + return $this->userAgent = null; } /** @@ -235,7 +272,30 @@ public function getMedias($username, $count = 20, $maxId = '') return $this->getMediasByUserId($account->getId(), $count, $maxId); } - + /** + * @param string $username + * + * @return Account + * @throws InstagramException + * @throws InstagramNotFoundException + */ + public function getAccount($username) + { + $response = Request::get(Endpoints::getAccountJsonLink($username), $this->generateHeaders($this->userSession)); + if (static::HTTP_NOT_FOUND === $response->code) { + throw new InstagramNotFoundException('Account with given username does not exist.'); + } + if (static::HTTP_OK !== $response->code) { + throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.'); + } + + $userArray = json_decode($response->raw_body, true, 512, JSON_BIGINT_AS_STRING); + if (!isset($userArray['graphql']['user'])) { + throw new InstagramNotFoundException('Account with this username does not exist', 404); + } + return Account::create($userArray['graphql']['user']); + } + /** * @param int $id * @param int $count @@ -278,7 +338,6 @@ public function getMediasByUserId($id, $count = 20, $maxId = '') } return $medias; } - /** * @param $mediaId @@ -345,53 +404,53 @@ public function getMediaByCode($mediaCode) */ public function getPaginateMedias($username, $maxId = '') { - $account = $this->getAccount($username); - $hasNextPage = true; - $medias = []; - - $toReturn = [ - 'medias' => $medias, - 'maxId' => $maxId, - 'hasNextPage' => $hasNextPage, - ]; - - $response = Request::get(Endpoints::getAccountMediasJsonLink($account->getId(), $maxId), - $this->generateHeaders($this->userSession)); - - // use a raw constant in the code is not a good idea!! - //if ($response->code !== 200) { - if (static::HTTP_OK !== $response->code) { - throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.'); - } - - $arr = json_decode($response->raw_body, true, 512, JSON_BIGINT_AS_STRING); - - if (!is_array($arr)) { - throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.'); - } - $nodes = $arr['data']['user']['edge_owner_to_timeline_media']['edges']; - - //if (count($arr['items']) === 0) { - // I generally use empty. Im not sure why people would use count really - If the array is large then count takes longer/has more overhead. - // If you simply need to know whether or not the array is empty then use empty. - if (empty($nodes)) { - return $toReturn; - } - - foreach ($nodes as $mediaArray) { - $medias[] = Media::create($mediaArray['node']); - } - - $maxId = $arr['data']['user']['edge_owner_to_timeline_media']['page_info']['end_cursor']; + $account = $this->getAccount($username); + $hasNextPage = true; + $medias = []; + + $toReturn = [ + 'medias' => $medias, + 'maxId' => $maxId, + 'hasNextPage' => $hasNextPage, + ]; + + $response = Request::get(Endpoints::getAccountMediasJsonLink($account->getId(), $maxId), + $this->generateHeaders($this->userSession)); + + // use a raw constant in the code is not a good idea!! + //if ($response->code !== 200) { + if (static::HTTP_OK !== $response->code) { + throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.'); + } + + $arr = json_decode($response->raw_body, true, 512, JSON_BIGINT_AS_STRING); + + if (!is_array($arr)) { + throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.'); + } + $nodes = $arr['data']['user']['edge_owner_to_timeline_media']['edges']; + + //if (count($arr['items']) === 0) { + // I generally use empty. Im not sure why people would use count really - If the array is large then count takes longer/has more overhead. + // If you simply need to know whether or not the array is empty then use empty. + if (empty($nodes)) { + return $toReturn; + } + + foreach ($nodes as $mediaArray) { + $medias[] = Media::create($mediaArray['node']); + } + + $maxId = $arr['data']['user']['edge_owner_to_timeline_media']['page_info']['end_cursor']; $hasNextPage = $arr['data']['user']['edge_owner_to_timeline_media']['page_info']['has_next_page']; - - $toReturn = [ - 'medias' => $medias, - 'maxId' => $maxId, - 'hasNextPage' => $hasNextPage, - ]; - - return $toReturn; + + $toReturn = [ + 'medias' => $medias, + 'maxId' => $maxId, + 'hasNextPage' => $hasNextPage, + ]; + + return $toReturn; } /** @@ -568,7 +627,6 @@ public function getAccountById($id) return $this->getAccount($username); } - /** * @param string $id * @@ -611,30 +669,6 @@ public function getUsernameById($id) return $username; } - /** - * @param string $username - * - * @return Account - * @throws InstagramException - * @throws InstagramNotFoundException - */ - public function getAccount($username) - { - $response = Request::get(Endpoints::getAccountJsonLink($username), $this->generateHeaders($this->userSession)); - if (static::HTTP_NOT_FOUND === $response->code) { - throw new InstagramNotFoundException('Account with given username does not exist.'); - } - if (static::HTTP_OK !== $response->code) { - throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.'); - } - - $userArray = json_decode($response->raw_body, true, 512, JSON_BIGINT_AS_STRING); - if (!isset($userArray['graphql']['user'])) { - throw new InstagramNotFoundException('Account with this username does not exist', 404); - } - return Account::create($userArray['graphql']['user']); - } - /** * @param string $tag * @param int $count @@ -774,7 +808,7 @@ public function getCurrentTopMediasByTagName($tagName) $this->userSession['csrftoken'] = $cookies['csrftoken']; $jsonResponse = json_decode($response->raw_body, true, 512, JSON_BIGINT_AS_STRING); $medias = []; - $nodes = (array) @$jsonResponse['graphql']['hashtag']['edge_hashtag_to_media']['edges']; + $nodes = (array)@$jsonResponse['graphql']['hashtag']['edge_hashtag_to_media']['edges']; foreach ($nodes as $mediaArray) { $medias[] = Media::create($mediaArray['node']); } @@ -1116,6 +1150,33 @@ public function login($force = false, $support_two_step_verification = false) return $this->generateHeaders($this->userSession); } + /** + * @param $session + * + * @return bool + */ + public function isLoggedIn($session) + { + if (is_null($session) || !isset($session['sessionid'])) { + return false; + } + $sessionId = $session['sessionid']; + $csrfToken = $session['csrftoken']; + $headers = ['cookie' => "csrftoken=$csrfToken; sessionid=$sessionId;", + 'referer' => Endpoints::BASE_URL . '/', + 'x-csrftoken' => $csrfToken, + ]; + $response = Request::get(Endpoints::BASE_URL, $headers); + if ($response->code !== 200) { + return false; + } + $cookies = static::parseCookies($response->headers['Set-Cookie']); + if (!isset($cookies['ds_user_id'])) { + return false; + } + return true; + } + private function verifyTwoStep($response, $cookies) { $new_cookies = static::parseCookies($response->headers['Set-Cookie']); @@ -1197,33 +1258,6 @@ private function verifyTwoStep($response, $cookies) return $response; } - /** - * @param $session - * - * @return bool - */ - public function isLoggedIn($session) - { - if (is_null($session) || !isset($session['sessionid'])) { - return false; - } - $sessionId = $session['sessionid']; - $csrfToken = $session['csrftoken']; - $headers = ['cookie' => "csrftoken=$csrfToken; sessionid=$sessionId;", - 'referer' => Endpoints::BASE_URL . '/', - 'x-csrftoken' => $csrfToken, - ]; - $response = Request::get(Endpoints::BASE_URL, $headers); - if ($response->code !== 200) { - return false; - } - $cookies = static::parseCookies($response->headers['Set-Cookie']); - if (!isset($cookies['ds_user_id'])) { - return false; - } - return true; - } - /** * */ @@ -1232,43 +1266,4 @@ public function saveSession() $cachedString = static::$instanceCache->getItem($this->sessionUsername); $cachedString->set($this->userSession); } - - /** - * @param array $config - */ - public static function setProxy(array $config) - { - $defaultConfig = [ - 'port' => false, - 'tunnel' => false, - 'address' => false, - 'type' => CURLPROXY_HTTP, - 'timeout' => false, - 'auth' => [ - 'user' => '', - 'pass' => '', - 'method' => CURLAUTH_BASIC - ], - ]; - - $config = array_replace($defaultConfig, $config); - - Request::proxy($config['address'], $config['port'], $config['type'], $config['tunnel']); - - if (isset($config['auth'])) { - Request::proxyAuth($config['auth']['user'], $config['auth']['pass'], $config['auth']['method']); - } - - if (isset($config['timeout'])) { - Request::timeout((int)$config['timeout']); - } - } - - /** - * Disable proxy for all requests - */ - public static function disableProxy() - { - Request::proxy(''); - } } diff --git a/src/InstagramScraper/Model/Media.php b/src/InstagramScraper/Model/Media.php index 2086a21c..8b4bdd07 100644 --- a/src/InstagramScraper/Model/Media.php +++ b/src/InstagramScraper/Model/Media.php @@ -262,7 +262,8 @@ public function getImageHighResolutionUrl() /** * @return array */ - public function getSquareThumbnailsUrl() { + public function getSquareThumbnailsUrl() + { return $this->squareThumbnailsUrl; } @@ -409,32 +410,33 @@ protected function initPropertiesCustom($value, $prop, $arr) $this->likesCount = $arr[$prop]['count']; break; case 'thumbnail_resources': - foreach( $value as $thumbnail ) { - $thumbnailsUrl[] = $thumbnail['src']; - switch ($thumbnail['config_width']) { - case 150: - $this->imageThumbnailUrl = $thumbnail['src']; - break; - case 320: - $this->imageLowResolutionUrl = $thumbnail['src']; - break; - case 640: - $this->imageStandardResolutionUrl = $thumbnail['src']; - break; - default:; - } + foreach ($value as $thumbnail) { + $thumbnailsUrl[] = $thumbnail['src']; + switch ($thumbnail['config_width']) { + case 150: + $this->imageThumbnailUrl = $thumbnail['src']; + break; + case 320: + $this->imageLowResolutionUrl = $thumbnail['src']; + break; + case 640: + $this->imageStandardResolutionUrl = $thumbnail['src']; + break; + default: + ; + } } $this->squareThumbnailsUrl = $thumbnailsUrl; break; case 'display_url': - $this->imageHighResolutionUrl = $value; - break; + $this->imageHighResolutionUrl = $value; + break; case 'display_src': - $this->imageHighResolutionUrl = $value; - if (!isset($this->type)) { - $this->type = static::TYPE_IMAGE; - } - break; + $this->imageHighResolutionUrl = $value; + if (!isset($this->type)) { + $this->type = static::TYPE_IMAGE; + } + break; case 'carousel_media': $this->type = self::TYPE_CAROUSEL; $this->carouselMedia = []; @@ -502,7 +504,7 @@ protected function initPropertiesCustom($value, $prop, $arr) $this->likesCount = $arr[$prop]['count']; break; case 'edge_liked_by': - $this->likesCount = $arr[$prop]['count']; + $this->likesCount = $arr[$prop]['count']; break; case 'edge_media_to_caption': if (is_array($arr[$prop]['edges']) && !empty($arr[$prop]['edges'])) { @@ -547,24 +549,6 @@ protected function initPropertiesCustom($value, $prop, $arr) } } - /** - * @param string $imageUrl - * - * @return array - */ - private static function getImageUrls($imageUrl) - { - $parts = explode('/', parse_url($imageUrl)['path']); - $imageName = $parts[sizeof($parts) - 1]; - $urls = [ - 'thumbnail' => Endpoints::INSTAGRAM_CDN_URL . 't/s150x150/' . $imageName, - 'low' => Endpoints::INSTAGRAM_CDN_URL . 't/s320x320/' . $imageName, - 'standard' => Endpoints::INSTAGRAM_CDN_URL . 't/s640x640/' . $imageName, - 'high' => Endpoints::INSTAGRAM_CDN_URL . 't/' . $imageName, - ]; - return $urls; - } - /** * @param $mediaArray * @param $carouselArray @@ -599,6 +583,24 @@ private static function setCarouselMedia($mediaArray, $carouselArray, $instance) return $mediaArray; } + /** + * @param string $imageUrl + * + * @return array + */ + private static function getImageUrls($imageUrl) + { + $parts = explode('/', parse_url($imageUrl)['path']); + $imageName = $parts[sizeof($parts) - 1]; + $urls = [ + 'thumbnail' => Endpoints::INSTAGRAM_CDN_URL . 't/s150x150/' . $imageName, + 'low' => Endpoints::INSTAGRAM_CDN_URL . 't/s320x320/' . $imageName, + 'standard' => Endpoints::INSTAGRAM_CDN_URL . 't/s640x640/' . $imageName, + 'high' => Endpoints::INSTAGRAM_CDN_URL . 't/' . $imageName, + ]; + return $urls; + } + /** * @return Account */ From 042064f56a67d4e2e822f72598eef8f53001b8a4 Mon Sep 17 00:00:00 2001 From: Christophe Histaesse Date: Fri, 23 Mar 2018 11:16:21 +0100 Subject: [PATCH 11/24] Update Account.php user ID property is pk on search JSON result --- src/InstagramScraper/Model/Account.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/InstagramScraper/Model/Account.php b/src/InstagramScraper/Model/Account.php index 01f76e00..237b128f 100644 --- a/src/InstagramScraper/Model/Account.php +++ b/src/InstagramScraper/Model/Account.php @@ -208,6 +208,7 @@ protected function initPropertiesCustom($value, $prop, $array) { switch ($prop) { case 'id': + case 'pk': $this->id = $value; break; case 'username': From 7dcab88b0e995e2970141d9121a85bfc1247511a Mon Sep 17 00:00:00 2001 From: Pasha Verdi Date: Tue, 27 Mar 2018 12:34:00 +0300 Subject: [PATCH 12/24] fix thumbnails --- src/InstagramScraper/Model/Media.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/InstagramScraper/Model/Media.php b/src/InstagramScraper/Model/Media.php index 8b4bdd07..4ce8d2b7 100644 --- a/src/InstagramScraper/Model/Media.php +++ b/src/InstagramScraper/Model/Media.php @@ -409,17 +409,17 @@ protected function initPropertiesCustom($value, $prop, $arr) case 'likes': $this->likesCount = $arr[$prop]['count']; break; - case 'thumbnail_resources': + case 'display_resources': foreach ($value as $thumbnail) { $thumbnailsUrl[] = $thumbnail['src']; switch ($thumbnail['config_width']) { - case 150: + case 640: $this->imageThumbnailUrl = $thumbnail['src']; break; - case 320: + case 750: $this->imageLowResolutionUrl = $thumbnail['src']; break; - case 640: + case 1080: $this->imageStandardResolutionUrl = $thumbnail['src']; break; default: From b7a93105805bbc8b477f9eb88079fa02e237da7d Mon Sep 17 00:00:00 2001 From: Vitor Marcelino Date: Thu, 29 Mar 2018 09:42:22 -0300 Subject: [PATCH 13/24] Fix the paginate in getMediaCommentsByCode(), with end_cursor --- src/InstagramScraper/Instagram.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index 44ecbb1f..8605750a 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -516,7 +516,7 @@ public function getMediaCommentsByCode($code, $count = 10, $maxId = null) if (sizeof($nodes) == 0) { return $comments; } - $maxId = $nodes[sizeof($nodes) - 1]['node']['id']; + $maxId = $jsonResponse['data']['shortcode_media']['edge_media_to_comment']['page_info']['end_cursor']; } return $comments; } From 927ba401121a6eb284b1f3538ce9fbf3fbe016eb Mon Sep 17 00:00:00 2001 From: Abyr Valg Date: Tue, 3 Apr 2018 13:28:44 +0300 Subject: [PATCH 14/24] Media: Load all info from comments edge --- src/InstagramScraper/Model/Media.php | 54 +++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/src/InstagramScraper/Model/Media.php b/src/InstagramScraper/Model/Media.php index 8b4bdd07..247c6b28 100644 --- a/src/InstagramScraper/Model/Media.php +++ b/src/InstagramScraper/Model/Media.php @@ -135,6 +135,21 @@ class Media extends AbstractModel */ protected $commentsCount = 0; + /** + * @var Comment[] + */ + protected $comments = []; + + /** + * @var bool + */ + protected $hasMoreComments = false; + + /** + * @var string + */ + protected $commentsNextPage = ''; + /** * @var Media[]|array */ @@ -372,6 +387,30 @@ public function getCommentsCount() return $this->commentsCount; } + /** + * @return Comment[] + */ + public function getComments() + { + return $this->comments; + } + + /** + * @return bool + */ + public function hasMoreComments() + { + return $this->hasMoreComments; + } + + /** + * @return string + */ + public function getCommentsNextPage() + { + return $this->commentsNextPage; + } + /** * @return Media[]|array */ @@ -498,7 +537,20 @@ protected function initPropertiesCustom($value, $prop, $arr) $this->link = Endpoints::getMediaPageLink($this->shortCode); break; case 'edge_media_to_comment': - $this->commentsCount = $arr[$prop]['count']; + if (isset($arr[$prop]['count'])) { + $this->commentsCount = (int) $arr[$prop]['count']; + } + if (isset($arr[$prop]['edges']) && is_array($arr[$prop]['edges'])) { + foreach ($arr[$prop]['edges'] as $commentData) { + $this->comments[] = Comment::create($commentData['node']); + } + } + if (isset($arr[$prop]['page_info']['has_next_page'])) { + $this->hasMoreComments = (bool) $arr[$prop]['page_info']['has_next_page']; + } + if (isset($arr[$prop]['page_info']['end_cursor'])) { + $this->commentsNextPage = (string) $arr[$prop]['page_info']['end_cursor']; + } break; case 'edge_media_preview_like': $this->likesCount = $arr[$prop]['count']; From 1ba2b19dc1cfac1ef613bd1cb55685f2f5304143 Mon Sep 17 00:00:00 2001 From: Maxim Date: Tue, 10 Apr 2018 14:40:03 +0700 Subject: [PATCH 15/24] Update Instagram.php add new method getMediasFromFeed --- src/InstagramScraper/Instagram.php | 41 ++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index 8605750a..c9eb487e 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -338,7 +338,48 @@ public function getMediasByUserId($id, $count = 20, $maxId = '') } return $medias; } + + /** + * @param string $username + * @param int $count + * + * @return Media[] + * @throws InstagramException + */ + public function getMediasFromFeed($username, $count = 20) + { + $medias = []; + $index = 0; + $response = Request::get(Endpoints::getAccountJsonLink($username), $this->generateHeaders($this->userSession)); + if (static::HTTP_NOT_FOUND === $response->code) { + throw new InstagramNotFoundException('Account with given username does not exist.'); + } + if (static::HTTP_OK !== $response->code) { + throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.'); + } + $userArray = json_decode($response->raw_body, true, 512, JSON_BIGINT_AS_STRING); + if (!isset($userArray['graphql']['user'])) { + throw new InstagramNotFoundException('Account with this username does not exist', 404); + } + + $nodes = $userArray['graphql']['user']['edge_owner_to_timeline_media']['edges']; + + if (!isset($nodes) || empty($nodes)) { + return []; + } + + foreach ($nodes as $mediaArray) { + if ($index === $count) { + return $medias; + } + $medias[] = Media::create($mediaArray['node']); + $index++; + } + + return $medias; + } + /** * @param $mediaId * From 63dadca79c461ad5a9be6cac4fe72a2f29d74da2 Mon Sep 17 00:00:00 2001 From: Rhuan Carlos Date: Tue, 10 Apr 2018 08:56:52 -0300 Subject: [PATCH 16/24] SQUARE images, not resized proportional --- src/InstagramScraper/Model/Media.php | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/InstagramScraper/Model/Media.php b/src/InstagramScraper/Model/Media.php index 6edc7886..b5ea34c1 100644 --- a/src/InstagramScraper/Model/Media.php +++ b/src/InstagramScraper/Model/Media.php @@ -63,7 +63,7 @@ class Media extends AbstractModel /** * @var array */ - protected $squareThumbnailsUrl = []; + protected $squareImages = []; /** * @var array @@ -277,9 +277,9 @@ public function getImageHighResolutionUrl() /** * @return array */ - public function getSquareThumbnailsUrl() + public function getSquareImages() { - return $this->squareThumbnailsUrl; + return $this->squareImages; } @@ -449,23 +449,26 @@ protected function initPropertiesCustom($value, $prop, $arr) $this->likesCount = $arr[$prop]['count']; break; case 'display_resources': - foreach ($value as $thumbnail) { - $thumbnailsUrl[] = $thumbnail['src']; - switch ($thumbnail['config_width']) { + foreach ($value as $media) { + $mediasUrl[] = $media['src']; + switch ($media['config_width']) { case 640: - $this->imageThumbnailUrl = $thumbnail['src']; + $this->imageThumbnailUrl = $media['src']; break; case 750: - $this->imageLowResolutionUrl = $thumbnail['src']; + $this->imageLowResolutionUrl = $media['src']; break; case 1080: - $this->imageStandardResolutionUrl = $thumbnail['src']; + $this->imageStandardResolutionUrl = $media['src']; break; - default: - ; } } - $this->squareThumbnailsUrl = $thumbnailsUrl; + break; + case 'thumbnail_resources': + foreach ($value as $thumbnail) { + $thumbnailsUrl[] = $thumbnail['src']; + } + $this->squareImages = $thumbnailsUrl; break; case 'display_url': $this->imageHighResolutionUrl = $value; From e92441ed91a20c220354bf4b0fa507e98382f584 Mon Sep 17 00:00:00 2001 From: Rhuan Carlos Date: Tue, 10 Apr 2018 09:00:26 -0300 Subject: [PATCH 17/24] renamed --- src/InstagramScraper/Model/Media.php | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/InstagramScraper/Model/Media.php b/src/InstagramScraper/Model/Media.php index b5ea34c1..74f18a5b 100644 --- a/src/InstagramScraper/Model/Media.php +++ b/src/InstagramScraper/Model/Media.php @@ -465,10 +465,11 @@ protected function initPropertiesCustom($value, $prop, $arr) } break; case 'thumbnail_resources': - foreach ($value as $thumbnail) { - $thumbnailsUrl[] = $thumbnail['src']; + $squareImagesUrl = []; + foreach ($value as $squareImage) { + $squareImagesUrl[] = $squareImage['src']; } - $this->squareImages = $thumbnailsUrl; + $this->squareImages = $squareImagesUrl; break; case 'display_url': $this->imageHighResolutionUrl = $value; From 21dd8e05d0c50eb3f07b477c1ae9f37dfd525314 Mon Sep 17 00:00:00 2001 From: yazhog Date: Thu, 12 Apr 2018 10:33:55 +0300 Subject: [PATCH 18/24] fix getAccount --- src/InstagramScraper/Instagram.php | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index 8605750a..8db24626 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -281,7 +281,7 @@ public function getMedias($username, $count = 20, $maxId = '') */ public function getAccount($username) { - $response = Request::get(Endpoints::getAccountJsonLink($username), $this->generateHeaders($this->userSession)); + $response = Request::get(Endpoints::getAccountPageLink($username), $this->generateHeaders($this->userSession)); if (static::HTTP_NOT_FOUND === $response->code) { throw new InstagramNotFoundException('Account with given username does not exist.'); } @@ -289,11 +289,13 @@ public function getAccount($username) throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.'); } - $userArray = json_decode($response->raw_body, true, 512, JSON_BIGINT_AS_STRING); - if (!isset($userArray['graphql']['user'])) { + preg_match_all('#\_sharedData \= (.*?)\;\<\/script\>#', $response->raw_body, $out); + $userArray = json_decode($out[1][0], true, 512, JSON_BIGINT_AS_STRING); + + if (!isset($userArray['entry_data']['ProfilePage'][0]['graphql']['user'])) { throw new InstagramNotFoundException('Account with this username does not exist', 404); } - return Account::create($userArray['graphql']['user']); + return Account::create($userArray['entry_data']['ProfilePage'][0]['graphql']['user']); } /** From c48b37b79d659610b966f2b015b9bccef3aee72b Mon Sep 17 00:00:00 2001 From: Vitaly Date: Wed, 18 Apr 2018 16:34:00 +0300 Subject: [PATCH 19/24] Fix for new query_hash + gis signed header requirement. --- src/InstagramScraper/Endpoints.php | 13 ++++--- src/InstagramScraper/Instagram.php | 56 ++++++++++++++++++++++++++---- 2 files changed, 58 insertions(+), 11 deletions(-) diff --git a/src/InstagramScraper/Endpoints.php b/src/InstagramScraper/Endpoints.php index 95b49a87..967b66a9 100644 --- a/src/InstagramScraper/Endpoints.php +++ b/src/InstagramScraper/Endpoints.php @@ -8,7 +8,7 @@ class Endpoints const LOGIN_URL = 'https://www.instagram.com/accounts/login/ajax/'; const ACCOUNT_PAGE = 'https://www.instagram.com/{username}'; const MEDIA_LINK = 'https://www.instagram.com/p/{code}'; - const ACCOUNT_MEDIAS = 'https://instagram.com/graphql/query/?query_id=17888483320059182&id={user_id}&first={count}&after={max_id}'; + const ACCOUNT_MEDIAS = 'https://www.instagram.com/graphql/query/?query_hash=42323d64886122307be10013ad2dcc44&variables={variables}'; const ACCOUNT_JSON_INFO = 'https://www.instagram.com/{username}/?__a=1'; const MEDIA_JSON_INFO = 'https://www.instagram.com/p/{code}/?__a=1'; const MEDIA_JSON_BY_LOCATION_ID = 'https://www.instagram.com/explore/locations/{{facebookLocationId}}/?__a=1&max_id={{maxId}}'; @@ -44,6 +44,11 @@ public static function setAccountMediasRequestCount($count) static::$requestMediaCount = $count; } + public static function getAccountMediasRequestCount() + { + return static::$requestMediaCount; + } + public static function getAccountPageLink($username) { return str_replace('{username}', urlencode($username), static::ACCOUNT_PAGE); @@ -59,11 +64,9 @@ public static function getAccountJsonInfoLinkByAccountId($id) return str_replace('{userId}', urlencode($id), static::ACCOUNT_JSON_INFO_BY_ID); } - public static function getAccountMediasJsonLink($userId, $maxId = '') + public static function getAccountMediasJsonLink($variables) { - $url = str_replace('{user_id}', urlencode($userId), static::ACCOUNT_MEDIAS); - $url = str_replace('{count}', static::$requestMediaCount, $url); - return str_replace('{max_id}', urlencode($maxId), $url); + return str_replace('{variables}', urlencode($variables), static::ACCOUNT_MEDIAS); } public static function getMediaPageLink($code) diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index 08c1b59a..05ce84dc 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -33,7 +33,8 @@ class Instagram private $sessionUsername; private $sessionPassword; private $userSession; - private $userAgent = null; + private $rhxGis = null; + private $userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.106 Safari/537.36'; /** * @param string $username @@ -203,10 +204,11 @@ public function searchAccountsByUsername($username) /** * @param $session + * @param $gisToken * * @return array */ - private function generateHeaders($session) + private function generateHeaders($session, $gisToken = null) { $headers = []; if ($session) { @@ -223,6 +225,10 @@ private function generateHeaders($session) if ($this->getUserAgent()) { $headers['user-agent'] = $this->getUserAgent(); + + if (!is_null($gisToken)) { + $headers['x-instagram-gis'] = $gisToken; + } } return $headers; @@ -298,6 +304,35 @@ public function getAccount($username) return Account::create($userArray['entry_data']['ProfilePage'][0]['graphql']['user']); } + private function getSharedDataFromPage($url = Endpoints::BASE_URL) + { + $response = Request::get(rtrim($url, '/') . '/', $this->generateHeaders($this->userSession)); + if (static::HTTP_NOT_FOUND === $response->code) { + throw new InstagramNotFoundException('Account with given username does not exist.'); + } + if (static::HTTP_OK !== $response->code) { + throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.'); + } + + preg_match_all('#\_sharedData \= (.*?)\;\<\/script\>#', $response->raw_body, $out); + return json_decode($out[1][0], true, 512, JSON_BIGINT_AS_STRING); + } + + private function getRhxGis() + { + if ($this->rhxGis === null) { + $sharedData = $this->getSharedDataFromPage(); + $this->rhxGis = $sharedData['rhx_gis']; + } + + return $this->rhxGis; + } + + private function generateGisToken($variables) + { + return md5(implode(':', [$this->getRhxGis(), $variables ])); + } + /** * @param int $id * @param int $count @@ -306,13 +341,22 @@ public function getAccount($username) * @return Media[] * @throws InstagramException */ - public function getMediasByUserId($id, $count = 20, $maxId = '') + public function getMediasByUserId($id, $count = 12, $maxId = '') { $index = 0; $medias = []; $isMoreAvailable = true; while ($index < $count && $isMoreAvailable) { - $response = Request::get(Endpoints::getAccountMediasJsonLink($id, $maxId), $this->generateHeaders($this->userSession)); + $variables = json_encode([ + 'id' => (string) $id, + 'first' => (string) $count, + 'after' => (string) $maxId + ]); + + var_dump($this->getRhxGis(), $variables, $this->generateGisToken($variables)); + + $response = Request::get(Endpoints::getAccountMediasJsonLink($variables), $this->generateHeaders($this->userSession, $this->generateGisToken($variables))); + if (static::HTTP_OK !== $response->code) { throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.'); } @@ -340,13 +384,13 @@ public function getMediasByUserId($id, $count = 20, $maxId = '') } return $medias; } - + /** * @param string $username * @param int $count - * * @return Media[] * @throws InstagramException + * @throws InstagramNotFoundException */ public function getMediasFromFeed($username, $count = 20) { From 549a64f439a2e21302d3c867a9fbdc86a2125930 Mon Sep 17 00:00:00 2001 From: Vitaly Date: Wed, 18 Apr 2018 16:44:42 +0300 Subject: [PATCH 20/24] delete dump --- src/InstagramScraper/Instagram.php | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index 05ce84dc..5a6baa9f 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -353,8 +353,6 @@ public function getMediasByUserId($id, $count = 12, $maxId = '') 'after' => (string) $maxId ]); - var_dump($this->getRhxGis(), $variables, $this->generateGisToken($variables)); - $response = Request::get(Endpoints::getAccountMediasJsonLink($variables), $this->generateHeaders($this->userSession, $this->generateGisToken($variables))); if (static::HTTP_OK !== $response->code) { From 6e933d04ba2329af7e26a87ce432423fca28901b Mon Sep 17 00:00:00 2001 From: Vitaly Date: Thu, 19 Apr 2018 12:32:44 +0300 Subject: [PATCH 21/24] Optimize getUsernameById method (without authenticate) --- examples/getAccountById.php | 6 ++--- src/InstagramScraper/Endpoints.php | 6 +++++ src/InstagramScraper/Instagram.php | 37 ++++++++++-------------------- 3 files changed, 20 insertions(+), 29 deletions(-) diff --git a/examples/getAccountById.php b/examples/getAccountById.php index ca8fd1d2..a237c152 100644 --- a/examples/getAccountById.php +++ b/examples/getAccountById.php @@ -1,9 +1,7 @@ login(); -$account = $instagram->getAccountById('3'); +$account = (new \InstagramScraper\Instagram())->getAccountById('3'); // Available fields echo "Account info:\n"; @@ -17,4 +15,4 @@ echo "Number of followers: {$account->getFollowedByCount()}\n"; echo "Number of follows: {$account->getFollowsCount()}\n"; echo "Is private: {$account->isPrivate()}\n"; -echo "Is verified: {$account->isVerified()}\n"; +echo "Is verified: {$account->isVerified()}\n"; \ No newline at end of file diff --git a/src/InstagramScraper/Endpoints.php b/src/InstagramScraper/Endpoints.php index 967b66a9..57ab64f0 100644 --- a/src/InstagramScraper/Endpoints.php +++ b/src/InstagramScraper/Endpoints.php @@ -26,6 +26,7 @@ class Endpoints const USER_FEED2 = 'https://www.instagram.com/?__a=1'; const INSTAGRAM_QUERY_URL = 'https://www.instagram.com/query/'; const INSTAGRAM_CDN_URL = 'https://scontent.cdninstagram.com/'; + const ACCOUNT_JSON_PRIVATE_INFO_BY_ID = 'https://i.instagram.com/api/v1/users/{userId}/info/'; const ACCOUNT_MEDIAS2 = 'https://www.instagram.com/graphql/query/?query_id=17880160963012870&id={{accountId}}&first=10&after='; @@ -64,6 +65,11 @@ public static function getAccountJsonInfoLinkByAccountId($id) return str_replace('{userId}', urlencode($id), static::ACCOUNT_JSON_INFO_BY_ID); } + public static function getAccountJsonPrivateInfoLinkByAccountId($id) + { + return str_replace('{userId}', urlencode($id), static::ACCOUNT_JSON_PRIVATE_INFO_BY_ID); + } + public static function getAccountMediasJsonLink($variables) { return str_replace('{variables}', urlencode($variables), static::ACCOUNT_MEDIAS); diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index 5a6baa9f..8ef539e9 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -714,44 +714,31 @@ public function getAccountById($id) /** * @param string $id - * * @return string * @throws InstagramException - * @throws \InvalidArgumentException + * @throws InstagramNotFoundException */ public function getUsernameById($id) { - // Use the follow page to get the account. The follow url will redirect to the home page for the user, - // which has the username embedded in the url. + $response = Request::get(Endpoints::getAccountJsonPrivateInfoLinkByAccountId($id), $this->generateHeaders($this->userSession)); - if (!is_numeric($id)) { - throw new \InvalidArgumentException('User id must be integer or integer wrapped in string'); + if (static::HTTP_NOT_FOUND === $response->code) { + throw new InstagramNotFoundException('Account with given username does not exist.'); } - $url = Endpoints::getFollowUrl($id); - - // Cut a request by disabling redirects. - Request::curlOpt(CURLOPT_FOLLOWLOCATION, FALSE); - $response = Request::get($url, $this->generateHeaders($this->userSession)); - Request::curlOpt(CURLOPT_FOLLOWLOCATION, TRUE); - - if ($response->code === 400) { - throw new InstagramException('Account with this id does not exist.'); + if (static::HTTP_OK !== $response->code) { + throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.'); } - if ($response->code !== 302) { - throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->raw_body) . ' Something went wrong. Please report issue.'); + if (!($responseArray = json_decode($response->raw_body, true))) { + throw new InstagramException('Response does not JSON'); } - $cookies = static::parseCookies($response->headers['Set-Cookie']); - $this->userSession['csrftoken'] = $cookies['csrftoken']; - - // Get the username from the response url. - $responseUrl = $response->headers['Location']; - $urlParts = explode('/', rtrim($responseUrl, '/')); - $username = end($urlParts); + if ($responseArray['status'] !== 'ok') { + throw new InstagramException((isset($responseArray['message']) ? $responseArray['message'] : 'Unknown Error')); + } - return $username; + return $responseArray['user']['username']; } /** From b040fab1c0dcfb5453c3ef7d542bdc9b1834adad Mon Sep 17 00:00:00 2001 From: Vitaly Date: Thu, 19 Apr 2018 12:35:21 +0300 Subject: [PATCH 22/24] getPaginateMedias with x-instagram-gis required header --- examples/getPaginateMediasByUsername.php | 31 ++++++++++++++++++++++++ src/InstagramScraper/Instagram.php | 13 ++++++++-- 2 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 examples/getPaginateMediasByUsername.php diff --git a/examples/getPaginateMediasByUsername.php b/examples/getPaginateMediasByUsername.php new file mode 100644 index 00000000..d631d093 --- /dev/null +++ b/examples/getPaginateMediasByUsername.php @@ -0,0 +1,31 @@ +getPaginateMedias('kevin'); + +foreach ($response['medias'] as $media) { + /** @var \InstagramScraper\Model\Media $media */ + + echo "Media info:" . PHP_EOL; + echo "Id: {$media->getId()}" . PHP_EOL; + echo "Shotrcode: {$media->getShortCode()}" . PHP_EOL; + echo "Created at: {$media->getCreatedTime()}" . PHP_EOL; + echo "Caption: {$media->getCaption()}" . PHP_EOL; + echo "Number of comments: {$media->getCommentsCount()}" . PHP_EOL; + echo "Number of likes: {$media->getLikesCount()}" . PHP_EOL; + echo "Get link: {$media->getLink()}" . PHP_EOL; + echo "High resolution image: {$media->getImageHighResolutionUrl()}" . PHP_EOL; + echo "Media type (video or image): {$media->getType()}" . PHP_EOL . PHP_EOL; + $account = $media->getOwner(); + + echo "Account info:" . PHP_EOL; + echo "Id: {$account->getId()}" . PHP_EOL; + echo "Username: {$account->getUsername()}" . PHP_EOL; + echo "Full name: {$account->getFullName()}" . PHP_EOL; + echo "Profile pic url: {$account->getProfilePicUrl()}" . PHP_EOL; + echo PHP_EOL . PHP_EOL; +} + +echo "HasNextPage: {$response['hasNextPage']}" . PHP_EOL; +echo "MaxId: {$response['maxId']}" . PHP_EOL; \ No newline at end of file diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index 8ef539e9..06294910 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -486,6 +486,7 @@ public function getMediaByCode($mediaCode) * * @return array * @throws InstagramException + * @throws InstagramNotFoundException */ public function getPaginateMedias($username, $maxId = '') { @@ -499,8 +500,16 @@ public function getPaginateMedias($username, $maxId = '') 'hasNextPage' => $hasNextPage, ]; - $response = Request::get(Endpoints::getAccountMediasJsonLink($account->getId(), $maxId), - $this->generateHeaders($this->userSession)); + $variables = json_encode([ + 'id' => (string) $account->getId(), + 'first' => (string) Endpoints::getAccountMediasRequestCount(), + 'after' => (string) $maxId + ]); + + $response = Request::get( + Endpoints::getAccountMediasJsonLink($variables), + $this->generateHeaders($this->userSession, $this->generateGisToken($variables)) + ); // use a raw constant in the code is not a good idea!! //if ($response->code !== 200) { From 237ea0b803f960f1ab033f51c371061c18c59771 Mon Sep 17 00:00:00 2001 From: Vitaly Date: Thu, 19 Apr 2018 12:36:59 +0300 Subject: [PATCH 23/24] small refactoring --- src/InstagramScraper/Instagram.php | 97 +++++++++++++++++++++--------- 1 file changed, 70 insertions(+), 27 deletions(-) diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index 06294910..5107de0e 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -13,6 +13,8 @@ use InstagramScraper\Model\Story; use InstagramScraper\Model\Tag; use InstagramScraper\Model\UserStories; +use InvalidArgumentException; +use phpFastCache\Cache\ExtendedCacheItemPoolInterface; use phpFastCache\CacheManager; use Unirest\Request; @@ -26,7 +28,9 @@ class Instagram const PAGING_DELAY_MINIMUM_MICROSEC = 1000000; // 1 sec min delay to simulate browser const PAGING_DELAY_MAXIMUM_MICROSEC = 3000000; // 3 sec max delay to simulate browser + /** @var ExtendedCacheItemPoolInterface $instanceCache */ private static $instanceCache; + public $pagingTimeLimitSec = self::PAGING_TIME_LIMIT_SEC; public $pagingDelayMinimumMicrosec = self::PAGING_DELAY_MINIMUM_MICROSEC; public $pagingDelayMaximumMicrosec = self::PAGING_DELAY_MAXIMUM_MICROSEC; @@ -42,6 +46,7 @@ class Instagram * @param null $sessionFolder * * @return Instagram + * @throws \phpFastCache\Exceptions\phpFastCacheDriverCheckException */ public static function withCredentials($username, $password, $sessionFolder = null) { @@ -254,11 +259,9 @@ public function setUserAgent($userAgent) } /** - * @param $userAgent - * * @return null */ - public function resetUserAgent($userAgent) + public function resetUserAgent() { return $this->userAgent = null; } @@ -270,6 +273,7 @@ public function resetUserAgent($userAgent) * * @return Media[] * @throws InstagramException + * @throws InstagramNotFoundException */ public function getMedias($username, $count = 20, $maxId = '') { @@ -295,39 +299,37 @@ public function getAccount($username) throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.'); } - preg_match_all('#\_sharedData \= (.*?)\;\<\/script\>#', $response->raw_body, $out); - $userArray = json_decode($out[1][0], true, 512, JSON_BIGINT_AS_STRING); - + $userArray = self::extractSharedDataFromBody($response->raw_body); + if (!isset($userArray['entry_data']['ProfilePage'][0]['graphql']['user'])) { throw new InstagramNotFoundException('Account with this username does not exist', 404); } return Account::create($userArray['entry_data']['ProfilePage'][0]['graphql']['user']); } - private function getSharedDataFromPage($url = Endpoints::BASE_URL) - { - $response = Request::get(rtrim($url, '/') . '/', $this->generateHeaders($this->userSession)); - if (static::HTTP_NOT_FOUND === $response->code) { - throw new InstagramNotFoundException('Account with given username does not exist.'); - } - if (static::HTTP_OK !== $response->code) { - throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.'); - } - - preg_match_all('#\_sharedData \= (.*?)\;\<\/script\>#', $response->raw_body, $out); - return json_decode($out[1][0], true, 512, JSON_BIGINT_AS_STRING); - } - + /** + * @return null + * @throws InstagramException + */ private function getRhxGis() { if ($this->rhxGis === null) { - $sharedData = $this->getSharedDataFromPage(); - $this->rhxGis = $sharedData['rhx_gis']; + try { + $sharedData = $this->getSharedDataFromPage(); + $this->rhxGis = $sharedData['rhx_gis']; + } catch (\Exception $exception) { + throw new InstagramException('Could not extract gis from page'); + } } return $this->rhxGis; } + /** + * @param $variables + * @return string + * @throws InstagramException + */ private function generateGisToken($variables) { return md5(implode(':', [$this->getRhxGis(), $variables ])); @@ -358,7 +360,9 @@ public function getMediasByUserId($id, $count = 12, $maxId = '') if (static::HTTP_OK !== $response->code) { throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.'); } + $arr = json_decode($response->raw_body, true, 512, JSON_BIGINT_AS_STRING); + if (!is_array($arr)) { throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.'); } @@ -422,12 +426,14 @@ public function getMediasFromFeed($username, $count = 20) } return $medias; - } - + } + /** * @param $mediaId * * @return Media + * @throws InstagramException + * @throws InstagramNotFoundException */ public function getMediaById($mediaId) { @@ -445,7 +451,7 @@ public function getMediaById($mediaId) public function getMediaByUrl($mediaUrl) { if (filter_var($mediaUrl, FILTER_VALIDATE_URL) === false) { - throw new \InvalidArgumentException('Malformed media url'); + throw new InvalidArgumentException('Malformed media url'); } $response = Request::get(rtrim($mediaUrl, '/') . '/?__a=1', $this->generateHeaders($this->userSession)); // use a raw constant in the code is not a good idea!! @@ -548,11 +554,12 @@ public function getPaginateMedias($username, $maxId = '') } /** - * @param $mediaId + * @param $mediaId * @param int $count * @param null $maxId * * @return Comment[] + * @throws InstagramException */ public function getMediaCommentsById($mediaId, $count = 10, $maxId = null) { @@ -713,7 +720,8 @@ public function getMediaLikesByCode($code, $count = 10, $maxId = null) * * @return Account * @throws InstagramException - * @throws \InvalidArgumentException + * @throws InvalidArgumentException + * @throws InstagramNotFoundException */ public function getAccountById($id) { @@ -1258,6 +1266,12 @@ public function isLoggedIn($session) return true; } + /** + * @param $response + * @param $cookies + * @return \Unirest\Response + * @throws InstagramAuthException + */ private function verifyTwoStep($response, $cookies) { $new_cookies = static::parseCookies($response->headers['Set-Cookie']); @@ -1347,4 +1361,33 @@ public function saveSession() $cachedString = static::$instanceCache->getItem($this->sessionUsername); $cachedString->set($this->userSession); } + + private static function extractSharedDataFromBody($body) + { + if (preg_match_all('#\_sharedData \= (.*?)\;\<\/script\>#', $body, $out)) { + return json_decode($out[1][0], true, 512, JSON_BIGINT_AS_STRING); + } + + return null; + } + + /** + * @param string $url + * @return mixed|null + * @throws InstagramException + * @throws InstagramNotFoundException + */ + private function getSharedDataFromPage($url = Endpoints::BASE_URL) + { + $response = Request::get(rtrim($url, '/') . '/', $this->generateHeaders($this->userSession)); + if (static::HTTP_NOT_FOUND === $response->code) { + throw new InstagramNotFoundException("Page {$url} not found"); + } + + if (static::HTTP_OK !== $response->code) { + throw new InstagramException('Response code is ' . $response->code . '. Body: ' . static::getErrorBody($response->body) . ' Something went wrong. Please report issue.'); + } + + return self::extractSharedDataFromBody($response->raw_body); + } } From fe91cc8be5314b1cc1e9f56aaea5377a87cfac16 Mon Sep 17 00:00:00 2001 From: Christophe Histaesse Date: Mon, 23 Apr 2018 17:10:44 +0200 Subject: [PATCH 24/24] Update Media.php Add imageThumbnailUrl from GraphQL endpoint ACCOUNT_MEDIAS --- src/InstagramScraper/Model/Media.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/InstagramScraper/Model/Media.php b/src/InstagramScraper/Model/Media.php index 6edc7886..d2a9479c 100644 --- a/src/InstagramScraper/Model/Media.php +++ b/src/InstagramScraper/Model/Media.php @@ -476,6 +476,9 @@ protected function initPropertiesCustom($value, $prop, $arr) $this->type = static::TYPE_IMAGE; } break; + case 'thumbnail_src': + $this->imageThumbnailUrl = $value; + break; case 'carousel_media': $this->type = self::TYPE_CAROUSEL; $this->carouselMedia = [];