From 5fa650df15a65d53eba206e698e12a3e1fe0b0c4 Mon Sep 17 00:00:00 2001 From: raiym Date: Wed, 30 Nov 2016 18:24:31 +0300 Subject: [PATCH 01/14] Quick fix of 405 error. Credits to @Bolandish --- index.php | 37 +++++++++++- src/InstagramScraper/Endpoints.php | 9 +-- src/InstagramScraper/Instagram.php | 90 +++++++++++++++++++++--------- 3 files changed, 104 insertions(+), 32 deletions(-) diff --git a/index.php b/index.php index 2a6424f9..72b3ca90 100644 --- a/index.php +++ b/index.php @@ -8,13 +8,46 @@ $instagram = new Instagram(); try { - $medias = Instagram::getMedias('kevin', 1000); - echo $medias[998]->imageThumbnailUrl; +// $medias = Instagram::getMedias('kevin', 1497); + $parameters = 'ig_user(3){id,username,external_url,full_name,profile_pic_url,biography,followed_by{count},follows{count},media{count},is_private,is_verified}'; +// echo json_encode($medias[1497]); + $account = json_decode(getContentsFromUrl($parameters), ($assoc || $assoc == "array")); + print_r($account); } catch (\Exception $ex) { print_r($ex); } +function getContentsFromUrl($parameters) { + if (!function_exists('curl_init')) { + return false; + } + $random = generateRandomString(); + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, "https://www.instagram.com/query/"); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); + curl_setopt($ch, CURLOPT_POST, 1); + curl_setopt($ch, CURLOPT_POSTFIELDS, 'q='.$parameters); + $headers = array(); + $headers[] = "Cookie: csrftoken=$random;"; + $headers[] = "X-Csrftoken: $random"; + $headers[] = "Referer: https://www.instagram.com/"; + curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); + $output = curl_exec($ch); + curl_close($ch); + return $output; +} + +function generateRandomString($length = 10) { + $characters = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'; + $charactersLength = strlen($characters); + $randomString = ''; + for ($i = 0; $i < $length; $i++) { + $randomString .= $characters[rand(0, $charactersLength - 1)]; + } + return $randomString; +} + //echo Media::getIdFromCode('z-arAqi4DP') . '
'; //echo Media::getCodeFromId('936303077400215759_123123'); //echo Media::getLinkFromId('936303077400215759_123123'); diff --git a/src/InstagramScraper/Endpoints.php b/src/InstagramScraper/Endpoints.php index a6e90900..6cbc3ed0 100644 --- a/src/InstagramScraper/Endpoints.php +++ b/src/InstagramScraper/Endpoints.php @@ -13,11 +13,12 @@ class Endpoints const MEDIA_JSON_BY_LOCATION_ID = 'https://www.instagram.com/explore/locations/{{facebookLocationId}}/?__a=1&max_id={{maxId}}'; const MEDIA_JSON_BY_TAG = 'https://www.instagram.com/explore/tags/{tag}/?__a=1&max_id={max_id}'; const GENERAL_SEARCH = 'https://www.instagram.com/web/search/topsearch/?query={query}'; - const ACCOUNT_JSON_INFO_BY_ID = 'https://www.instagram.com/query/?q=ig_user({userId}){id,username,external_url,full_name,profile_pic_url,biography,followed_by{count},follows{count},media{count},is_private,is_verified}'; - const LAST_COMMENTS_BY_CODE = 'https://www.instagram.com/query/?q=ig_shortcode({{code}}){comments.last({{count}}){count,nodes{id,created_at,text,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}'; - const COMMENTS_BEFORE_COMMENT_ID_BY_CODE = 'https://www.instagram.com/query/?q=ig_shortcode({{code}}){comments.before({{commentId}},{{count}}){count,nodes{id,created_at,text,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}'; - const LAST_LIKES_BY_CODE = 'https://www.instagram.com/query/?q=ig_shortcode({{code}}){likes{nodes{id,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}'; + const ACCOUNT_JSON_INFO_BY_ID = 'ig_user({userId}){id,username,external_url,full_name,profile_pic_url,biography,followed_by{count},follows{count},media{count},is_private,is_verified}'; + const LAST_COMMENTS_BY_CODE = 'ig_shortcode({{code}}){comments.last({{count}}){count,nodes{id,created_at,text,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}'; + const COMMENTS_BEFORE_COMMENT_ID_BY_CODE = 'ig_shortcode({{code}}){comments.before({{commentId}},{{count}}){count,nodes{id,created_at,text,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}'; + const LAST_LIKES_BY_CODE = 'ig_shortcode({{code}}){likes{nodes{id,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}'; + const INSTAGRAM_QUERY_URL = 'https://www.instagram.com/query/'; public static function getAccountPageLink($username) { diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index 15b5894e..422bb4e0 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -34,18 +34,21 @@ public static function getAccount($username) public static function getAccountById($id) { - if (!is_numeric($id)) { throw new \InvalidArgumentException('User id must be integer or integer wrapped in string'); } - $response = Request::get(Endpoints::getAccountJsonInfoLinkByAccountId($id)); - if ($response->code === 404) { - throw new InstagramNotFoundException('Account with given username does not exist.'); - } - if ($response->code !== 200) { - throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); - } - $userArray = json_decode($response->raw_body, true); + + $parameters = Endpoints::getAccountJsonInfoLinkByAccountId($id); +// self::getContentsFromUrl($parameters); + +// $response = Request::get(Endpoints::getAccountJsonInfoLinkByAccountId($id)); +// if ($response->code === 404) { +// throw new InstagramNotFoundException('Account with given username does not exist.'); +// } +// if ($response->code !== 200) { +// throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); +// } + $userArray = json_decode(self::getContentsFromUrl($parameters), true); if ($userArray['status'] === 'fail') { throw new InstagramException($userArray['message']); } @@ -55,6 +58,38 @@ public static function getAccountById($id) return Account::fromAccountPage($userArray); } + private function getContentsFromUrl($parameters) + { + if (!function_exists('curl_init')) { + return false; + } + $random = self::generateRandomString(); + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, Endpoints::INSTAGRAM_QUERY_URL); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); + curl_setopt($ch, CURLOPT_POST, 1); + curl_setopt($ch, CURLOPT_POSTFIELDS, 'q=' . $parameters); + $headers = array(); + $headers[] = "Cookie: csrftoken=$random;"; + $headers[] = "X-Csrftoken: $random"; + $headers[] = "Referer: https://www.instagram.com/"; + curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); + $output = curl_exec($ch); + curl_close($ch); + return $output; + } + + private function generateRandomString($length = 10) + { + $characters = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'; + $charactersLength = strlen($characters); + $randomString = ''; + for ($i = 0; $i < $length; $i++) { + $randomString .= $characters[rand(0, $charactersLength - 1)]; + } + return $randomString; + } + public static function getMedias($username, $count = 20, $maxId = '') { $index = 0; @@ -95,8 +130,8 @@ public static function getPaginateMedias($username, $maxId = '') $medias = []; $toReturn = [ - 'medias' => $medias, - 'maxId' => $maxId, + 'medias' => $medias, + 'maxId' => $maxId, 'hasNextPage' => $hasNextPage ]; @@ -124,8 +159,8 @@ public static function getPaginateMedias($username, $maxId = '') $hasNextPage = $arr['more_available']; $toReturn = [ - 'medias' => $medias, - 'maxId' => $maxId, + 'medias' => $medias, + 'maxId' => $maxId, 'hasNextPage' => $hasNextPage ]; @@ -197,8 +232,8 @@ public static function getPaginateMediasByTag($tag, $maxId = '') $medias = []; $toReturn = [ - 'medias' => $medias, - 'maxId' => $maxId, + 'medias' => $medias, + 'maxId' => $maxId, 'hasNextPage' => $hasNextPage ]; @@ -232,8 +267,8 @@ public static function getPaginateMediasByTag($tag, $maxId = '') $hasNextPage = $arr['tag']['media']['page_info']['has_next_page']; $toReturn = [ - 'medias' => $medias, - 'maxId' => $maxId, + 'medias' => $medias, + 'maxId' => $maxId, 'hasNextPage' => $hasNextPage ]; @@ -336,17 +371,20 @@ public static function getMediaCommentsByCode($code, $count = 10, $maxId = null) $remain = 0; } if (!isset($maxId)) { - $response = Request::get(Endpoints::getLastCommentsByCodeLink($code, $numberOfCommentsToRetreive)); +// $response = Request::get(Endpoints::getLastCommentsByCodeLink($code, $numberOfCommentsToRetreive)); + $parameters = Endpoints::getLastCommentsByCodeLink($code, $numberOfCommentsToRetreive); + } else { - $response = Request::get(Endpoints::getCommentsBeforeCommentIdByCode($code, $numberOfCommentsToRetreive, $maxId)); - } - if ($response->code === 404) { - throw new InstagramNotFoundException('Account with given username does not exist.'); - } - if ($response->code !== 200) { - throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); +// $response = Request::get(Endpoints::getCommentsBeforeCommentIdByCode($code, $numberOfCommentsToRetreive, $maxId)); + $parameters = Endpoints::getCommentsBeforeCommentIdByCode($code, $numberOfCommentsToRetreive, $maxId); } - $jsonResponse = json_decode($response->raw_body, true); +// if ($response->code === 404) { +// throw new InstagramNotFoundException('Account with given username does not exist.'); +// } +// if ($response->code !== 200) { +// throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); +// } + $jsonResponse = json_decode(self::getContentsFromUrl($parameters), true); $nodes = $jsonResponse['comments']['nodes']; foreach ($nodes as $commentArray) { $comments[] = Comment::fromApi($commentArray); From bf93a00ee31bc252f0926ca214145dac442b5eaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Krzych?= Date: Thu, 1 Dec 2016 10:06:19 +0100 Subject: [PATCH 02/14] Make methods getContentsFromUrl and generateRandomString static. --- src/InstagramScraper/Instagram.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index 422bb4e0..eab49f0b 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -58,7 +58,7 @@ public static function getAccountById($id) return Account::fromAccountPage($userArray); } - private function getContentsFromUrl($parameters) + private static function getContentsFromUrl($parameters) { if (!function_exists('curl_init')) { return false; @@ -79,7 +79,7 @@ private function getContentsFromUrl($parameters) return $output; } - private function generateRandomString($length = 10) + private static function generateRandomString($length = 10) { $characters = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'; $charactersLength = strlen($characters); From 2b8d4e84f1f000c7fe6c278ddb3bbad541e47880 Mon Sep 17 00:00:00 2001 From: raiym Date: Fri, 2 Dec 2016 22:29:34 +0300 Subject: [PATCH 03/14] Fixes #47. Image URLS broken for some medias in getMediaByCode() --- index.php | 38 +++------------------------- src/InstagramScraper/Endpoints.php | 1 + src/InstagramScraper/Model/Media.php | 20 ++++----------- 3 files changed, 9 insertions(+), 50 deletions(-) diff --git a/index.php b/index.php index 72b3ca90..143d6144 100644 --- a/index.php +++ b/index.php @@ -3,51 +3,19 @@ require_once 'vendor/autoload.php'; require_once 'src/InstagramScraper.php'; -use InstagramScraper\Exception\InstagramException; + use InstagramScraper\Instagram; -$instagram = new Instagram(); try { // $medias = Instagram::getMedias('kevin', 1497); - $parameters = 'ig_user(3){id,username,external_url,full_name,profile_pic_url,biography,followed_by{count},follows{count},media{count},is_private,is_verified}'; // echo json_encode($medias[1497]); - $account = json_decode(getContentsFromUrl($parameters), ($assoc || $assoc == "array")); - print_r($account); + $media = Instagram::getMediaByCode('BL0k1EXhElI'); + echo json_encode($media); } catch (\Exception $ex) { print_r($ex); } -function getContentsFromUrl($parameters) { - if (!function_exists('curl_init')) { - return false; - } - $random = generateRandomString(); - $ch = curl_init(); - curl_setopt($ch, CURLOPT_URL, "https://www.instagram.com/query/"); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); - curl_setopt($ch, CURLOPT_POST, 1); - curl_setopt($ch, CURLOPT_POSTFIELDS, 'q='.$parameters); - $headers = array(); - $headers[] = "Cookie: csrftoken=$random;"; - $headers[] = "X-Csrftoken: $random"; - $headers[] = "Referer: https://www.instagram.com/"; - curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); - $output = curl_exec($ch); - curl_close($ch); - return $output; -} - -function generateRandomString($length = 10) { - $characters = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'; - $charactersLength = strlen($characters); - $randomString = ''; - for ($i = 0; $i < $length; $i++) { - $randomString .= $characters[rand(0, $charactersLength - 1)]; - } - return $randomString; -} - //echo Media::getIdFromCode('z-arAqi4DP') . '
'; //echo Media::getCodeFromId('936303077400215759_123123'); //echo Media::getLinkFromId('936303077400215759_123123'); diff --git a/src/InstagramScraper/Endpoints.php b/src/InstagramScraper/Endpoints.php index 6cbc3ed0..1347f360 100644 --- a/src/InstagramScraper/Endpoints.php +++ b/src/InstagramScraper/Endpoints.php @@ -19,6 +19,7 @@ class Endpoints const LAST_LIKES_BY_CODE = 'ig_shortcode({{code}}){likes{nodes{id,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}'; const INSTAGRAM_QUERY_URL = 'https://www.instagram.com/query/'; + const INSTAGRAM_CDN_URL = 'https://scontent.cdninstagram.com/'; public static function getAccountPageLink($username) { diff --git a/src/InstagramScraper/Model/Media.php b/src/InstagramScraper/Model/Media.php index de47cda7..e11ee225 100644 --- a/src/InstagramScraper/Model/Media.php +++ b/src/InstagramScraper/Model/Media.php @@ -113,23 +113,13 @@ public static function fromMediaPage($mediaArray) private static function getImageUrls($imageUrl) { - $imageUrl = self::getCleanImageUrl($imageUrl); $parts = explode('/', parse_url($imageUrl)['path']); - if (sizeof($parts) == 4) { - $standard = 'https://scontent.cdninstagram.com/' . $parts[1] . '/s640x640/' . $parts[2] . '/' . $parts[3]; - } else { - if (isset($parts[4]) && $parts[4][0] == 'p') { - $standard = 'https://scontent.cdninstagram.com/' . $parts[1] . '/p640x640/' . $parts[3] . '/' . $parts[4]; - } else { - $standard = 'https://scontent.cdninstagram.com/' . $parts[1] . '/s640x640/' . $parts[3] . '/' . $parts[4]; - } - } - + $imageName = $parts[sizeof($parts) - 1]; $urls = [ - 'standard' => $standard, - 'low' => str_replace('640x640', '320x320', $standard), - 'high' => str_replace('640x640', '1080x1080', $standard), - 'thumbnail' => str_replace('640x640', '150x150', $standard) + 'standard' => Endpoints::INSTAGRAM_CDN_URL . 't/s640x640/' . $imageName, + 'low' => Endpoints::INSTAGRAM_CDN_URL . 't/s320x320/' . $imageName, + 'high' => Endpoints::INSTAGRAM_CDN_URL . 't/' . $imageName, + 'thumbnail' => Endpoints::INSTAGRAM_CDN_URL . 't/s150x150/' . $imageName ]; return $urls; } From a5440bcce70591b9e09ef178e930532399b1d400 Mon Sep 17 00:00:00 2001 From: raiym Date: Fri, 2 Dec 2016 22:54:05 +0300 Subject: [PATCH 04/14] Enh. Get best image resolution possible in imageHighResolutionUrl in all methods --- src/InstagramScraper/Model/Media.php | 41 ++++++++++++++-------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/src/InstagramScraper/Model/Media.php b/src/InstagramScraper/Model/Media.php index e11ee225..d99b10bc 100644 --- a/src/InstagramScraper/Model/Media.php +++ b/src/InstagramScraper/Model/Media.php @@ -43,10 +43,11 @@ public static function fromApi($mediaArray) $instance->link = $mediaArray['link']; $instance->commentsCount = $mediaArray['comments']['count']; $instance->likesCount = $mediaArray['likes']['count']; - $instance->imageLowResolutionUrl = self::getCleanImageUrl($mediaArray['images']['low_resolution']['url']); - $instance->imageThumbnailUrl = self::getCleanImageUrl($mediaArray['images']['thumbnail']['url']); - $instance->imageStandardResolutionUrl = self::getCleanImageUrl($mediaArray['images']['standard_resolution']['url']); - $instance->imageHighResolutionUrl = str_replace('320x320', '1080x1080', $instance->imageLowResolutionUrl); + $images = self::getImageUrls($mediaArray['images']['standard_resolution']['url']); + $instance->imageLowResolutionUrl = $images['low']; + $instance->imageThumbnailUrl = $images['thumbnail']; + $instance->imageStandardResolutionUrl = $images['standard']; + $instance->imageHighResolutionUrl = $images['high']; if (isset($mediaArray['caption'])) { $instance->caption = $mediaArray['caption']['text']; } @@ -67,9 +68,17 @@ public static function fromApi($mediaArray) return $instance; } - private static function getCleanImageUrl($imageUrl) + private static function getImageUrls($imageUrl) { - return strpos($imageUrl, '?ig_cache_key=') ? substr($imageUrl, 0, strpos($imageUrl, '?ig_cache_key=')) : $imageUrl; + $parts = explode('/', parse_url($imageUrl)['path']); + $imageName = $parts[sizeof($parts) - 1]; + $urls = [ + 'low' => Endpoints::INSTAGRAM_CDN_URL . 't/s320x320/' . $imageName, + 'thumbnail' => Endpoints::INSTAGRAM_CDN_URL . 't/s150x150/' . $imageName, + 'standard' => Endpoints::INSTAGRAM_CDN_URL . 't/s640x640/' . $imageName, + 'high' => Endpoints::INSTAGRAM_CDN_URL . 't/' . $imageName + ]; + return $urls; } public static function fromMediaPage($mediaArray) @@ -111,19 +120,6 @@ public static function fromMediaPage($mediaArray) return $instance; } - private static function getImageUrls($imageUrl) - { - $parts = explode('/', parse_url($imageUrl)['path']); - $imageName = $parts[sizeof($parts) - 1]; - $urls = [ - 'standard' => Endpoints::INSTAGRAM_CDN_URL . 't/s640x640/' . $imageName, - 'low' => Endpoints::INSTAGRAM_CDN_URL . 't/s320x320/' . $imageName, - 'high' => Endpoints::INSTAGRAM_CDN_URL . 't/' . $imageName, - 'thumbnail' => Endpoints::INSTAGRAM_CDN_URL . 't/s150x150/' . $imageName - ]; - return $urls; - } - public static function fromTagPage($mediaArray) { $instance = new self(); @@ -136,8 +132,11 @@ public static function fromTagPage($mediaArray) $instance->caption = $mediaArray['caption']; } $instance->createdTime = $mediaArray['date']; - $instance->imageThumbnailUrl = self::getCleanImageUrl($mediaArray['thumbnail_src']); - $instance->imageStandardResolutionUrl = self::getCleanImageUrl($mediaArray['display_src']); + $images = self::getImageUrls($mediaArray['display_src']); + $instance->imageStandardResolutionUrl = $images['standard']; + $instance->imageLowResolutionUrl = $images['low']; + $instance->imageHighResolutionUrl = $images['high']; + $instance->imageThumbnailUrl = $images['thumbnail']; $instance->type = 'image'; if ($mediaArray['is_video']) { $instance->type = 'video'; From aed755e2345cd2643cd3fda14597eda7c35a74f5 Mon Sep 17 00:00:00 2001 From: raiym Date: Sat, 3 Dec 2016 15:46:15 +0300 Subject: [PATCH 05/14] Delete getLikesByCode() method since in is not working anymore --- src/InstagramScraper/Instagram.php | 35 ---------------------------- src/InstagramScraper/Model/Media.php | 2 +- 2 files changed, 1 insertion(+), 36 deletions(-) diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index eab49f0b..ce4cc4b6 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -37,17 +37,7 @@ public static function getAccountById($id) if (!is_numeric($id)) { throw new \InvalidArgumentException('User id must be integer or integer wrapped in string'); } - $parameters = Endpoints::getAccountJsonInfoLinkByAccountId($id); -// self::getContentsFromUrl($parameters); - -// $response = Request::get(Endpoints::getAccountJsonInfoLinkByAccountId($id)); -// if ($response->code === 404) { -// throw new InstagramNotFoundException('Account with given username does not exist.'); -// } -// if ($response->code !== 200) { -// throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); -// } $userArray = json_decode(self::getContentsFromUrl($parameters), true); if ($userArray['status'] === 'fail') { throw new InstagramException($userArray['message']); @@ -371,19 +361,11 @@ public static function getMediaCommentsByCode($code, $count = 10, $maxId = null) $remain = 0; } if (!isset($maxId)) { -// $response = Request::get(Endpoints::getLastCommentsByCodeLink($code, $numberOfCommentsToRetreive)); $parameters = Endpoints::getLastCommentsByCodeLink($code, $numberOfCommentsToRetreive); } else { -// $response = Request::get(Endpoints::getCommentsBeforeCommentIdByCode($code, $numberOfCommentsToRetreive, $maxId)); $parameters = Endpoints::getCommentsBeforeCommentIdByCode($code, $numberOfCommentsToRetreive, $maxId); } -// if ($response->code === 404) { -// throw new InstagramNotFoundException('Account with given username does not exist.'); -// } -// if ($response->code !== 200) { -// throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); -// } $jsonResponse = json_decode(self::getContentsFromUrl($parameters), true); $nodes = $jsonResponse['comments']['nodes']; foreach ($nodes as $commentArray) { @@ -460,21 +442,4 @@ public static function getLocationById($facebookLocationId) $jsonResponse = json_decode($response->raw_body, true); return Location::makeLocation($jsonResponse['location']); } - - public static function getLastLikesByCode($code) - { - $response = Request::get(Endpoints::getLastLikesByCodeLink($code)); - if ($response->code === 404) { - throw new InstagramNotFoundException('Media with this shortcode doesn\'t exist'); - } - if ($response->code !== 200) { - throw new InstagramException('Response code is ' . $response->code . '. Body: ' . $response->body . ' Something went wrong. Please report issue.'); - } - $jsonResponse = json_decode($response->raw_body, true); - $users = []; - foreach ($jsonResponse['likes']['nodes'] as $userArray) { - $users[] = Account::fromAccountPage($userArray['user']); - } - return $users; - } } \ No newline at end of file diff --git a/src/InstagramScraper/Model/Media.php b/src/InstagramScraper/Model/Media.php index d99b10bc..5990df1b 100644 --- a/src/InstagramScraper/Model/Media.php +++ b/src/InstagramScraper/Model/Media.php @@ -73,8 +73,8 @@ private static function getImageUrls($imageUrl) $parts = explode('/', parse_url($imageUrl)['path']); $imageName = $parts[sizeof($parts) - 1]; $urls = [ - 'low' => Endpoints::INSTAGRAM_CDN_URL . 't/s320x320/' . $imageName, 'thumbnail' => Endpoints::INSTAGRAM_CDN_URL . 't/s150x150/' . $imageName, + 'low' => Endpoints::INSTAGRAM_CDN_URL . 't/s320x320/' . $imageName, 'standard' => Endpoints::INSTAGRAM_CDN_URL . 't/s640x640/' . $imageName, 'high' => Endpoints::INSTAGRAM_CDN_URL . 't/' . $imageName ]; From 072a8799f201dd46a093350fd978e0cd1f28e7a5 Mon Sep 17 00:00:00 2001 From: raiym Date: Sun, 4 Dec 2016 11:11:20 +0300 Subject: [PATCH 06/14] Update README.md --- README.md | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 89dc902b..505bf89d 100644 --- a/README.md +++ b/README.md @@ -35,9 +35,9 @@ echo $account->username; ### Search users by username ```php -$medias = Instagram::searchAccountsByUsername('durov'); +$users = Instagram::searchAccountsByUsername('durov'); echo '
';
-echo json_encode($medias);
+echo json_encode($users);
 echo '

'; ``` @@ -56,9 +56,20 @@ Available properties: $imageStandardResolutionUrl; $imageHighResolutionUrl; $caption; + $captionIsEdited; + $isAd; $videoLowResolutionUrl; $videoStandardResolutionUrl; $videoLowBandwidthUrl; + $videoViews; + $code; + $owner; + $ownerId; + $likesCount; + $locationId; + $locationName; + $commentsCount; + */ echo $medias[0]->imageHighResolutionUrl; echo $medias[0]->caption; From 9cf9efd290ebc11b62b6df478cd45381a26954c1 Mon Sep 17 00:00:00 2001 From: raiym Date: Mon, 12 Dec 2016 22:22:42 +0300 Subject: [PATCH 07/14] Update Changelog --- CHANGELOG | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index aab3e83c..23f53d0c 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,38 @@ Instagram PHP Scraper Change Log ================================ +Version v0.4.5 +-------------- +- Enh: BIG thing. Get best resolutions for medias possible + +Version v0.4.4 +-------------- +- Bug: Image urls broken for some medias in getMediaByCode() + +Version v0.4.3 +-------------- +- Enh: Make methods getContentsFromUrl() and generateRandomString() static + +Version v0.4.2 +-------------- +- Bug: Fix 405 error with fetching accounts by id +- Bug: Fix 405 error with fetching comments by id + +Version v0.4.1 +-------------- +- Enh: Url encode endpoints to support for example Japan language + +Version v0.4.0 +-------------- +- Enh: New methods to paginate medias getPaginateMedias() and getPaginateMediasByTag() + +Version v0.3.5 +-------------- +- Enh: Convenient media pagination getMediasByTag() + +Version v0.3.4 +-------------- +- Bug: Fix media urls + Version v0.3.3 -------------- - Bug: Include class Location and Comment in InstagramScraper.php From e3042ad68b2e304f6b28436d8d26c2f31a06de4e Mon Sep 17 00:00:00 2001 From: raiym Date: Mon, 12 Dec 2016 23:40:38 +0300 Subject: [PATCH 08/14] Add tests: testGetIdFromCode(), testGetCodeFromId() --- tests/InstagramTest.php | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/InstagramTest.php b/tests/InstagramTest.php index 3e840bdc..fa13e404 100644 --- a/tests/InstagramTest.php +++ b/tests/InstagramTest.php @@ -3,6 +3,7 @@ require '../vendor/autoload.php'; use InstagramScraper\Instagram; +use InstagramScraper\Model\Media; use PHPUnit\Framework\TestCase; @@ -63,4 +64,20 @@ public function testGetLocationById() $location = Instagram::getLocationById(1); $this->assertEquals('Dog Patch Labs', $location->name); } + + public function testGetIdFromCode() + { + $code = Media::getCodeFromId('1270593720437182847'); + $this->assertEquals('BGiDkHAgBF_', $code); + $code = Media::getCodeFromId('1270593720437182847_3'); + $this->assertEquals('BGiDkHAgBF_', $code); + $code = Media::getCodeFromId(1270593720437182847); + $this->assertEquals('BGiDkHAgBF_', $code); + } + + public function testGetCodeFromId() + { + $id = Media::getIdFromCode('BGiDkHAgBF_'); + $this->assertEquals(1270593720437182847, $id); + } } \ No newline at end of file From d9353e9f6ca8771b21c3bd535aee03bae4add48f Mon Sep 17 00:00:00 2001 From: raiym Date: Mon, 12 Dec 2016 23:57:38 +0300 Subject: [PATCH 09/14] Update README.md --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 505bf89d..712aec70 100644 --- a/README.md +++ b/README.md @@ -174,4 +174,7 @@ $medias = Instagram::getLocationTopMediasById(1); ### Get location medias by location id ```php $medias = Instagram::getLocationMediasById(1); -``` \ No newline at end of file +``` + +### Other +Java library: https://github.com/postaddictme/instagram-java-scraper \ No newline at end of file From 92148c6d1b1ca2c2930035c219206552cf0bfdba Mon Sep 17 00:00:00 2001 From: raiym Date: Tue, 13 Dec 2016 00:10:59 +0300 Subject: [PATCH 10/14] Refactor getIdFromCode() method --- src/InstagramScraper/Model/Media.php | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/InstagramScraper/Model/Media.php b/src/InstagramScraper/Model/Media.php index 5990df1b..88980ae2 100644 --- a/src/InstagramScraper/Model/Media.php +++ b/src/InstagramScraper/Model/Media.php @@ -148,21 +148,13 @@ public static function fromTagPage($mediaArray) public static function getIdFromCode($code) { - $alphabet = [ - '-' => 62, '1' => 53, '0' => 52, '3' => 55, '2' => 54, '5' => 57, '4' => 56, '7' => 59, '6' => 58, '9' => 61, - '8' => 60, 'A' => 0, 'C' => 2, 'B' => 1, 'E' => 4, 'D' => 3, 'G' => 6, 'F' => 5, 'I' => 8, 'H' => 7, - 'K' => 10, 'J' => 9, 'M' => 12, 'L' => 11, 'O' => 14, 'N' => 13, 'Q' => 16, 'P' => 15, 'S' => 18, 'R' => 17, - 'U' => 20, 'T' => 19, 'W' => 22, 'V' => 21, 'Y' => 24, 'X' => 23, 'Z' => 25, '_' => 63, 'a' => 26, 'c' => 28, - 'b' => 27, 'e' => 30, 'd' => 29, 'g' => 32, 'f' => 31, 'i' => 34, 'h' => 33, 'k' => 36, 'j' => 35, 'm' => 38, - 'l' => 37, 'o' => 40, 'n' => 39, 'q' => 42, 'p' => 41, 's' => 44, 'r' => 43, 'u' => 46, 't' => 45, 'w' => 48, - 'v' => 47, 'y' => 50, 'x' => 49, 'z' => 51 - ]; - $n = 0; + $alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'; + $id = 0; for ($i = 0; $i < strlen($code); $i++) { $c = $code[$i]; - $n = $n * 64 + $alphabet[$c]; + $id = $id * 64 + strpos($alphabet, $c); } - return $n; + return $id; } public static function getLinkFromId($id) From 32e9f5894ded519629a96cb6a145c4c6547dda84 Mon Sep 17 00:00:00 2001 From: raiym Date: Tue, 13 Dec 2016 00:12:54 +0300 Subject: [PATCH 11/14] Rename variable --- src/InstagramScraper/Model/Media.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/InstagramScraper/Model/Media.php b/src/InstagramScraper/Model/Media.php index 88980ae2..7b2e8812 100644 --- a/src/InstagramScraper/Model/Media.php +++ b/src/InstagramScraper/Model/Media.php @@ -168,12 +168,12 @@ public static function getCodeFromId($id) $parts = explode('_', $id); $id = $parts[0]; $alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'; - $shortenedId = ''; + $code = ''; while ($id > 0) { $remainder = $id % 64; $id = ($id - $remainder) / 64; - $shortenedId = $alphabet{$remainder} . $shortenedId; + $code = $alphabet{$remainder} . $code; }; - return $shortenedId; + return $code; } } From 7a9e36db617a1fbcb5a8a7c2810ea66321226faf Mon Sep 17 00:00:00 2001 From: Fabiano Roberto Date: Wed, 8 Feb 2017 10:46:01 +0100 Subject: [PATCH 12/14] Return count hashtags --- src/InstagramScraper/Instagram.php | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index 15b5894e..ce4d80a8 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -228,13 +228,15 @@ public static function getPaginateMediasByTag($tag, $maxId = '') $medias[] = Media::fromTagPage($mediaArray); } - $maxId = $arr['tag']['media']['page_info']['end_cursor']; + $maxId = $arr['tag']['media']['page_info']['end_cursor']; $hasNextPage = $arr['tag']['media']['page_info']['has_next_page']; + $count = $arr['tag']['media']['count']; $toReturn = [ 'medias' => $medias, + 'count' => $count, 'maxId' => $maxId, - 'hasNextPage' => $hasNextPage + 'hasNextPage' => $hasNextPage, ]; return $toReturn; From 7e3a1dcaf9a1480f341a665fa179fa94084b028b Mon Sep 17 00:00:00 2001 From: raiym Date: Thu, 23 Feb 2017 17:30:42 +0300 Subject: [PATCH 13/14] Fixes #59. Incorrect number of results returned for getMediasByTag(); --- index.php | 5 +++-- src/InstagramScraper/Instagram.php | 18 ++++++++++++------ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/index.php b/index.php index 143d6144..2f342aaf 100644 --- a/index.php +++ b/index.php @@ -9,8 +9,9 @@ try { // $medias = Instagram::getMedias('kevin', 1497); // echo json_encode($medias[1497]); - $media = Instagram::getMediaByCode('BL0k1EXhElI'); - echo json_encode($media); + $medias = InstagramScraper\Instagram::getMediasByTag('paveldurov', 300); + echo sizeof($medias) . '\n'; +// echo json_encode($medias); } catch (\Exception $ex) { print_r($ex); } diff --git a/src/InstagramScraper/Instagram.php b/src/InstagramScraper/Instagram.php index 52c90a30..d83df454 100644 --- a/src/InstagramScraper/Instagram.php +++ b/src/InstagramScraper/Instagram.php @@ -185,6 +185,7 @@ public static function getMediasByTag($tag, $count = 12, $maxId = '') { $index = 0; $medias = []; + $mediaIds = []; $hasNextPage = true; while ($index < $count && $hasNextPage) { $response = Request::get(Endpoints::getMediasJsonByTagLink($tag, $maxId)); @@ -204,7 +205,12 @@ public static function getMediasByTag($tag, $count = 12, $maxId = '') if ($index === $count) { return $medias; } - $medias[] = Media::fromTagPage($mediaArray); + $media = Media::fromTagPage($mediaArray); + if (in_array($media->id, $mediaIds)) { + return $medias; + } + $mediaIds[] = $media->id; + $medias[] = $media; $index++; } if (count($nodes) == 0) { @@ -253,14 +259,14 @@ public static function getPaginateMediasByTag($tag, $maxId = '') $medias[] = Media::fromTagPage($mediaArray); } - $maxId = $arr['tag']['media']['page_info']['end_cursor']; + $maxId = $arr['tag']['media']['page_info']['end_cursor']; $hasNextPage = $arr['tag']['media']['page_info']['has_next_page']; - $count = $arr['tag']['media']['count']; + $count = $arr['tag']['media']['count']; $toReturn = [ - 'medias' => $medias, - 'count' => $count, - 'maxId' => $maxId, + 'medias' => $medias, + 'count' => $count, + 'maxId' => $maxId, 'hasNextPage' => $hasNextPage, ]; From 7cf9783753d2915862a8d592d088bfc4e93344b5 Mon Sep 17 00:00:00 2001 From: Fabiano Roberto Date: Fri, 24 Feb 2017 14:53:33 +0100 Subject: [PATCH 14/14] Handle new album post --- src/InstagramScraper/Model/Media.php | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/InstagramScraper/Model/Media.php b/src/InstagramScraper/Model/Media.php index 7b2e8812..089f0427 100644 --- a/src/InstagramScraper/Model/Media.php +++ b/src/InstagramScraper/Model/Media.php @@ -55,9 +55,11 @@ public static function fromApi($mediaArray) if (isset($mediaArray['video_views'])) { $instance->videoViews = $mediaArray['video_views']; } - $instance->videoLowResolutionUrl = $mediaArray['videos']['low_resolution']['url']; - $instance->videoStandardResolutionUrl = $mediaArray['videos']['standard_resolution']['url']; - $instance->videoLowBandwidthUrl = $mediaArray['videos']['low_bandwidth']['url']; + if (isset($mediaArray['videos'])) { + $instance->videoLowResolutionUrl = $mediaArray['videos']['low_resolution']['url']; + $instance->videoStandardResolutionUrl = $mediaArray['videos']['standard_resolution']['url']; + $instance->videoLowBandwidthUrl = $mediaArray['videos']['low_bandwidth']['url']; + } } if (isset($mediaArray['location']['id'])) { $instance->locationId = $mediaArray['location']['id']; @@ -176,4 +178,4 @@ public static function getCodeFromId($id) }; return $code; } -} +} \ No newline at end of file